diff --git "a/VideoMamba-Ti_30epoch_75.9/logs/log_rank0.txt" "b/VideoMamba-Ti_30epoch_75.9/logs/log_rank0.txt" new file mode 100644--- /dev/null +++ "b/VideoMamba-Ti_30epoch_75.9/logs/log_rank0.txt" @@ -0,0 +1,8075 @@ +[2024-12-10 11:21:26 root] (main.py 238): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_video_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_path='datasets/imagenet/', data_set='IMNET', inat_category='name', output_dir='exp/tab2/video_tiny', device='cuda', seed=0, resume='', autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, model_pth='/pretrained', if_merge_odd=False, merge_mode='sum', if_shuffle=False, shuffle_rate=0.0, choose='max', compare=11, data_ratio=1.0, data_seed=0, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-10 11:21:31 root] (main.py 304): INFO Creating model: RMeeTo_video_tiny +[2024-12-10 11:21:36 root] (main.py 393): INFO number of params: 7148008 +[2024-12-10 11:21:37 root] (main.py 513): INFO Start training for 30 epochs +[2024-12-10 11:21:45 root] (utils.py 283): INFO Epoch: [0] [ 0/2502] eta: 5:39:16 lr: 0.000020 loss_cls: 5.4397 (5.4397) grad_norm: 7.3125 (7.3125) time: 8.1360 data: 0.0014 max mem: 8334 +[2024-12-10 11:21:53 root] (utils.py 283): INFO Epoch: [0] [ 10/2502] eta: 1:01:07 lr: 0.000020 loss_cls: 5.4397 (5.2342) grad_norm: 7.2101 (7.1275) time: 1.4719 data: 0.0003 max mem: 8426 +[2024-12-10 11:22:01 root] (utils.py 283): INFO Epoch: [0] [ 20/2502] eta: 0:46:56 lr: 0.000020 loss_cls: 5.0471 (4.9936) grad_norm: 6.3423 (6.4148) time: 0.7849 data: 0.0002 max mem: 8426 +[2024-12-10 11:22:09 root] (utils.py 283): INFO Epoch: [0] [ 30/2502] eta: 0:41:59 lr: 0.000020 loss_cls: 4.9738 (4.9664) grad_norm: 4.8750 (5.8382) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 11:22:16 root] (utils.py 283): INFO Epoch: [0] [ 40/2502] eta: 0:39:22 lr: 0.000020 loss_cls: 4.5874 (4.8390) grad_norm: 4.1990 (5.3962) time: 0.7754 data: 0.0002 max mem: 8426 +[2024-12-10 11:22:24 root] (utils.py 283): INFO Epoch: [0] [ 50/2502] eta: 0:37:42 lr: 0.000020 loss_cls: 4.5087 (4.7780) grad_norm: 3.8379 (5.0901) time: 0.7731 data: 0.0003 max mem: 8426 +[2024-12-10 11:22:32 root] (utils.py 283): INFO Epoch: [0] [ 60/2502] eta: 0:36:27 lr: 0.000020 loss_cls: 4.5056 (4.7071) grad_norm: 3.5692 (4.8179) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 11:22:39 root] (utils.py 283): INFO Epoch: [0] [ 70/2502] eta: 0:35:35 lr: 0.000020 loss_cls: 4.4182 (4.6403) grad_norm: 3.4546 (4.6477) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 11:22:47 root] (utils.py 283): INFO Epoch: [0] [ 80/2502] eta: 0:34:56 lr: 0.000020 loss_cls: 4.3567 (4.5985) grad_norm: 3.1409 (4.4575) time: 0.7741 data: 0.0003 max mem: 8426 +[2024-12-10 11:22:55 root] (utils.py 283): INFO Epoch: [0] [ 90/2502] eta: 0:34:20 lr: 0.000020 loss_cls: 4.3567 (4.5641) grad_norm: 2.9805 (4.3226) time: 0.7693 data: 0.0003 max mem: 8426 +[2024-12-10 11:23:02 root] (utils.py 283): INFO Epoch: [0] [ 100/2502] eta: 0:33:49 lr: 0.000020 loss_cls: 4.5846 (4.5617) grad_norm: 3.0934 (4.2033) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-10 11:23:10 root] (utils.py 283): INFO Epoch: [0] [ 110/2502] eta: 0:33:22 lr: 0.000020 loss_cls: 4.6252 (4.5361) grad_norm: 3.0831 (4.1020) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 11:23:18 root] (utils.py 283): INFO Epoch: [0] [ 120/2502] eta: 0:33:00 lr: 0.000020 loss_cls: 4.4315 (4.5258) grad_norm: 2.9423 (4.0072) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 11:23:25 root] (utils.py 283): INFO Epoch: [0] [ 130/2502] eta: 0:32:40 lr: 0.000020 loss_cls: 4.4237 (4.5039) grad_norm: 2.9162 (3.9181) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 11:23:33 root] (utils.py 283): INFO Epoch: [0] [ 140/2502] eta: 0:32:25 lr: 0.000020 loss_cls: 4.4001 (4.4797) grad_norm: 2.8021 (3.8405) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 11:23:41 root] (utils.py 283): INFO Epoch: [0] [ 150/2502] eta: 0:32:09 lr: 0.000020 loss_cls: 4.2619 (4.4597) grad_norm: 2.7989 (3.7782) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-10 11:23:49 root] (utils.py 283): INFO Epoch: [0] [ 160/2502] eta: 0:31:56 lr: 0.000020 loss_cls: 4.2881 (4.4512) grad_norm: 2.7606 (3.7170) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 11:23:57 root] (utils.py 283): INFO Epoch: [0] [ 170/2502] eta: 0:31:42 lr: 0.000020 loss_cls: 4.3098 (4.4363) grad_norm: 2.6984 (3.6665) time: 0.7809 data: 0.0003 max mem: 8426 +[2024-12-10 11:24:04 root] (utils.py 283): INFO Epoch: [0] [ 180/2502] eta: 0:31:29 lr: 0.000020 loss_cls: 4.1614 (4.4109) grad_norm: 2.7482 (3.6187) time: 0.7787 data: 0.0002 max mem: 8426 +[2024-12-10 11:24:12 root] (utils.py 283): INFO Epoch: [0] [ 190/2502] eta: 0:31:17 lr: 0.000020 loss_cls: 4.0435 (4.3997) grad_norm: 2.7482 (3.5726) time: 0.7779 data: 0.0002 max mem: 8426 +[2024-12-10 11:24:20 root] (utils.py 283): INFO Epoch: [0] [ 200/2502] eta: 0:31:05 lr: 0.000020 loss_cls: 4.0634 (4.3791) grad_norm: 2.7664 (3.5374) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 11:24:28 root] (utils.py 283): INFO Epoch: [0] [ 210/2502] eta: 0:30:53 lr: 0.000020 loss_cls: 4.2052 (4.3662) grad_norm: 2.7664 (3.4983) time: 0.7788 data: 0.0002 max mem: 8426 +[2024-12-10 11:24:36 root] (utils.py 283): INFO Epoch: [0] [ 220/2502] eta: 0:30:42 lr: 0.000020 loss_cls: 4.2052 (4.3531) grad_norm: 2.6855 (3.4608) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 11:24:43 root] (utils.py 283): INFO Epoch: [0] [ 230/2502] eta: 0:30:32 lr: 0.000020 loss_cls: 4.2403 (4.3448) grad_norm: 2.7370 (3.4326) time: 0.7815 data: 0.0003 max mem: 8426 +[2024-12-10 11:24:51 root] (utils.py 283): INFO Epoch: [0] [ 240/2502] eta: 0:30:21 lr: 0.000020 loss_cls: 4.3193 (4.3386) grad_norm: 2.7056 (3.4021) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 11:24:59 root] (utils.py 283): INFO Epoch: [0] [ 250/2502] eta: 0:30:11 lr: 0.000020 loss_cls: 4.1482 (4.3212) grad_norm: 2.6285 (3.3728) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 11:25:07 root] (utils.py 283): INFO Epoch: [0] [ 260/2502] eta: 0:30:01 lr: 0.000020 loss_cls: 3.9497 (4.3044) grad_norm: 2.6081 (3.3444) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-10 11:25:15 root] (utils.py 283): INFO Epoch: [0] [ 270/2502] eta: 0:29:50 lr: 0.000020 loss_cls: 4.0672 (4.2951) grad_norm: 2.6747 (3.3217) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 11:25:22 root] (utils.py 283): INFO Epoch: [0] [ 280/2502] eta: 0:29:39 lr: 0.000020 loss_cls: 4.2493 (4.2984) grad_norm: 2.6349 (3.2952) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 11:25:30 root] (utils.py 283): INFO Epoch: [0] [ 290/2502] eta: 0:29:28 lr: 0.000020 loss_cls: 4.2748 (4.2850) grad_norm: 2.6165 (3.2732) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 11:25:37 root] (utils.py 283): INFO Epoch: [0] [ 300/2502] eta: 0:29:17 lr: 0.000020 loss_cls: 3.6946 (4.2718) grad_norm: 2.7181 (3.2548) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 11:25:45 root] (utils.py 283): INFO Epoch: [0] [ 310/2502] eta: 0:29:07 lr: 0.000020 loss_cls: 4.0423 (4.2668) grad_norm: 2.7009 (3.2362) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 11:25:53 root] (utils.py 283): INFO Epoch: [0] [ 320/2502] eta: 0:28:57 lr: 0.000020 loss_cls: 4.5231 (4.2778) grad_norm: 2.6286 (3.2187) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 11:26:00 root] (utils.py 283): INFO Epoch: [0] [ 330/2502] eta: 0:28:47 lr: 0.000020 loss_cls: 4.5231 (4.2728) grad_norm: 2.5690 (3.1977) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 11:26:08 root] (utils.py 283): INFO Epoch: [0] [ 340/2502] eta: 0:28:37 lr: 0.000020 loss_cls: 4.4315 (4.2756) grad_norm: 2.5087 (3.1785) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 11:26:16 root] (utils.py 283): INFO Epoch: [0] [ 350/2502] eta: 0:28:27 lr: 0.000020 loss_cls: 4.3409 (4.2720) grad_norm: 2.5321 (3.1616) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 11:26:23 root] (utils.py 283): INFO Epoch: [0] [ 360/2502] eta: 0:28:18 lr: 0.000020 loss_cls: 4.1761 (4.2682) grad_norm: 2.5683 (3.1469) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 11:26:31 root] (utils.py 283): INFO Epoch: [0] [ 370/2502] eta: 0:28:08 lr: 0.000020 loss_cls: 4.0424 (4.2616) grad_norm: 2.5683 (3.1327) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 11:26:39 root] (utils.py 283): INFO Epoch: [0] [ 380/2502] eta: 0:27:59 lr: 0.000020 loss_cls: 4.3706 (4.2578) grad_norm: 2.5803 (3.1184) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 11:26:46 root] (utils.py 283): INFO Epoch: [0] [ 390/2502] eta: 0:27:50 lr: 0.000020 loss_cls: 4.2527 (4.2521) grad_norm: 2.5991 (3.1059) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 11:26:54 root] (utils.py 283): INFO Epoch: [0] [ 400/2502] eta: 0:27:41 lr: 0.000020 loss_cls: 4.2527 (4.2544) grad_norm: 2.5991 (3.0936) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 11:27:02 root] (utils.py 283): INFO Epoch: [0] [ 410/2502] eta: 0:27:32 lr: 0.000020 loss_cls: 4.3387 (4.2592) grad_norm: 2.5249 (3.0809) time: 0.7733 data: 0.0002 max mem: 8426 +[2024-12-10 11:27:10 root] (utils.py 283): INFO Epoch: [0] [ 420/2502] eta: 0:27:23 lr: 0.000020 loss_cls: 4.3339 (4.2580) grad_norm: 2.5179 (3.0679) time: 0.7711 data: 0.0003 max mem: 8426 +[2024-12-10 11:27:17 root] (utils.py 283): INFO Epoch: [0] [ 430/2502] eta: 0:27:14 lr: 0.000020 loss_cls: 4.4850 (4.2624) grad_norm: 2.5288 (3.0567) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 11:27:25 root] (utils.py 283): INFO Epoch: [0] [ 440/2502] eta: 0:27:05 lr: 0.000020 loss_cls: 4.4850 (4.2657) grad_norm: 2.5204 (3.0462) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 11:27:32 root] (utils.py 283): INFO Epoch: [0] [ 450/2502] eta: 0:26:56 lr: 0.000020 loss_cls: 4.2463 (4.2637) grad_norm: 2.5185 (3.0352) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 11:27:40 root] (utils.py 283): INFO Epoch: [0] [ 460/2502] eta: 0:26:47 lr: 0.000020 loss_cls: 4.1929 (4.2561) grad_norm: 2.5675 (3.0256) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 11:27:48 root] (utils.py 283): INFO Epoch: [0] [ 470/2502] eta: 0:26:38 lr: 0.000020 loss_cls: 3.7622 (4.2439) grad_norm: 2.5775 (3.0163) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 11:27:56 root] (utils.py 283): INFO Epoch: [0] [ 480/2502] eta: 0:26:31 lr: 0.000020 loss_cls: 3.7787 (4.2402) grad_norm: 2.5380 (3.0075) time: 0.7779 data: 0.0003 max mem: 8426 +[2024-12-10 11:28:03 root] (utils.py 283): INFO Epoch: [0] [ 490/2502] eta: 0:26:22 lr: 0.000020 loss_cls: 4.2222 (4.2442) grad_norm: 2.5082 (2.9974) time: 0.7749 data: 0.0002 max mem: 8426 +[2024-12-10 11:28:11 root] (utils.py 283): INFO Epoch: [0] [ 500/2502] eta: 0:26:13 lr: 0.000020 loss_cls: 4.2735 (4.2390) grad_norm: 2.5082 (2.9888) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 11:28:19 root] (utils.py 283): INFO Epoch: [0] [ 510/2502] eta: 0:26:04 lr: 0.000020 loss_cls: 4.1847 (4.2419) grad_norm: 2.5546 (2.9802) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 11:28:26 root] (utils.py 283): INFO Epoch: [0] [ 520/2502] eta: 0:25:55 lr: 0.000020 loss_cls: 4.3387 (4.2419) grad_norm: 2.5749 (2.9736) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 11:28:34 root] (utils.py 283): INFO Epoch: [0] [ 530/2502] eta: 0:25:47 lr: 0.000020 loss_cls: 4.1700 (4.2341) grad_norm: 2.5286 (2.9651) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 11:28:42 root] (utils.py 283): INFO Epoch: [0] [ 540/2502] eta: 0:25:39 lr: 0.000020 loss_cls: 4.1634 (4.2313) grad_norm: 2.5009 (2.9575) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 11:28:49 root] (utils.py 283): INFO Epoch: [0] [ 550/2502] eta: 0:25:30 lr: 0.000020 loss_cls: 4.2519 (4.2301) grad_norm: 2.5990 (2.9524) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 11:28:57 root] (utils.py 283): INFO Epoch: [0] [ 560/2502] eta: 0:25:22 lr: 0.000020 loss_cls: 4.5634 (4.2336) grad_norm: 2.6514 (2.9462) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 11:29:05 root] (utils.py 283): INFO Epoch: [0] [ 570/2502] eta: 0:25:13 lr: 0.000020 loss_cls: 4.5053 (4.2353) grad_norm: 2.5055 (2.9380) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 11:29:12 root] (utils.py 283): INFO Epoch: [0] [ 580/2502] eta: 0:25:05 lr: 0.000020 loss_cls: 4.3028 (4.2360) grad_norm: 2.4744 (2.9298) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 11:29:20 root] (utils.py 283): INFO Epoch: [0] [ 590/2502] eta: 0:24:57 lr: 0.000020 loss_cls: 4.4055 (4.2357) grad_norm: 2.4389 (2.9215) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 11:29:28 root] (utils.py 283): INFO Epoch: [0] [ 600/2502] eta: 0:24:48 lr: 0.000020 loss_cls: 4.3640 (4.2323) grad_norm: 2.4888 (2.9159) time: 0.7715 data: 0.0003 max mem: 8426 +[2024-12-10 11:29:36 root] (utils.py 283): INFO Epoch: [0] [ 610/2502] eta: 0:24:41 lr: 0.000020 loss_cls: 4.0834 (4.2309) grad_norm: 2.6165 (2.9111) time: 0.7823 data: 0.0003 max mem: 8426 +[2024-12-10 11:29:43 root] (utils.py 283): INFO Epoch: [0] [ 620/2502] eta: 0:24:33 lr: 0.000020 loss_cls: 4.1690 (4.2319) grad_norm: 2.4971 (2.9038) time: 0.7877 data: 0.0003 max mem: 8426 +[2024-12-10 11:29:51 root] (utils.py 283): INFO Epoch: [0] [ 630/2502] eta: 0:24:25 lr: 0.000020 loss_cls: 4.2913 (4.2318) grad_norm: 2.4686 (2.8984) time: 0.7827 data: 0.0002 max mem: 8426 +[2024-12-10 11:29:59 root] (utils.py 283): INFO Epoch: [0] [ 640/2502] eta: 0:24:17 lr: 0.000020 loss_cls: 4.1563 (4.2289) grad_norm: 2.4778 (2.8928) time: 0.7698 data: 0.0003 max mem: 8426 +[2024-12-10 11:30:06 root] (utils.py 283): INFO Epoch: [0] [ 650/2502] eta: 0:24:08 lr: 0.000020 loss_cls: 4.0960 (4.2277) grad_norm: 2.4778 (2.8878) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 11:30:14 root] (utils.py 283): INFO Epoch: [0] [ 660/2502] eta: 0:24:00 lr: 0.000020 loss_cls: 3.9717 (4.2207) grad_norm: 2.5602 (2.8841) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 11:30:22 root] (utils.py 283): INFO Epoch: [0] [ 670/2502] eta: 0:23:52 lr: 0.000020 loss_cls: 4.1794 (4.2240) grad_norm: 2.6220 (2.8800) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 11:30:29 root] (utils.py 283): INFO Epoch: [0] [ 680/2502] eta: 0:23:44 lr: 0.000020 loss_cls: 4.4286 (4.2234) grad_norm: 2.4837 (2.8740) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 11:30:37 root] (utils.py 283): INFO Epoch: [0] [ 690/2502] eta: 0:23:36 lr: 0.000020 loss_cls: 3.8516 (4.2178) grad_norm: 2.4795 (2.8689) time: 0.7766 data: 0.0003 max mem: 8426 +[2024-12-10 11:30:45 root] (utils.py 283): INFO Epoch: [0] [ 700/2502] eta: 0:23:28 lr: 0.000020 loss_cls: 3.8516 (4.2139) grad_norm: 2.5365 (2.8640) time: 0.7753 data: 0.0002 max mem: 8426 +[2024-12-10 11:30:53 root] (utils.py 283): INFO Epoch: [0] [ 710/2502] eta: 0:23:19 lr: 0.000020 loss_cls: 4.0196 (4.2115) grad_norm: 2.4925 (2.8587) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:00 root] (utils.py 283): INFO Epoch: [0] [ 720/2502] eta: 0:23:11 lr: 0.000020 loss_cls: 4.0196 (4.2073) grad_norm: 2.4567 (2.8537) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:08 root] (utils.py 283): INFO Epoch: [0] [ 730/2502] eta: 0:23:03 lr: 0.000020 loss_cls: 4.1429 (4.2070) grad_norm: 2.4717 (2.8491) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:16 root] (utils.py 283): INFO Epoch: [0] [ 740/2502] eta: 0:22:55 lr: 0.000020 loss_cls: 4.4450 (4.2119) grad_norm: 2.5213 (2.8454) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 11:31:23 root] (utils.py 283): INFO Epoch: [0] [ 750/2502] eta: 0:22:47 lr: 0.000020 loss_cls: 4.4450 (4.2087) grad_norm: 2.5371 (2.8411) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:31 root] (utils.py 283): INFO Epoch: [0] [ 760/2502] eta: 0:22:39 lr: 0.000020 loss_cls: 4.2809 (4.2080) grad_norm: 2.5435 (2.8375) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:39 root] (utils.py 283): INFO Epoch: [0] [ 770/2502] eta: 0:22:31 lr: 0.000020 loss_cls: 4.2809 (4.2083) grad_norm: 2.5441 (2.8332) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:46 root] (utils.py 283): INFO Epoch: [0] [ 780/2502] eta: 0:22:22 lr: 0.000020 loss_cls: 4.2119 (4.2087) grad_norm: 2.5003 (2.8306) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 11:31:54 root] (utils.py 283): INFO Epoch: [0] [ 790/2502] eta: 0:22:14 lr: 0.000020 loss_cls: 4.0606 (4.2053) grad_norm: 2.4892 (2.8261) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:02 root] (utils.py 283): INFO Epoch: [0] [ 800/2502] eta: 0:22:06 lr: 0.000020 loss_cls: 4.0606 (4.2047) grad_norm: 2.4315 (2.8216) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:09 root] (utils.py 283): INFO Epoch: [0] [ 810/2502] eta: 0:21:58 lr: 0.000020 loss_cls: 3.7264 (4.1958) grad_norm: 2.4315 (2.8172) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:17 root] (utils.py 283): INFO Epoch: [0] [ 820/2502] eta: 0:21:50 lr: 0.000020 loss_cls: 3.7264 (4.1969) grad_norm: 2.4632 (2.8128) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:25 root] (utils.py 283): INFO Epoch: [0] [ 830/2502] eta: 0:21:42 lr: 0.000020 loss_cls: 4.2518 (4.1912) grad_norm: 2.4838 (2.8094) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:32 root] (utils.py 283): INFO Epoch: [0] [ 840/2502] eta: 0:21:34 lr: 0.000020 loss_cls: 3.7477 (4.1896) grad_norm: 2.5526 (2.8072) time: 0.7749 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:40 root] (utils.py 283): INFO Epoch: [0] [ 850/2502] eta: 0:21:27 lr: 0.000020 loss_cls: 4.3434 (4.1871) grad_norm: 2.5488 (2.8046) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:48 root] (utils.py 283): INFO Epoch: [0] [ 860/2502] eta: 0:21:18 lr: 0.000020 loss_cls: 4.1760 (4.1867) grad_norm: 2.4010 (2.8000) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 11:32:55 root] (utils.py 283): INFO Epoch: [0] [ 870/2502] eta: 0:21:10 lr: 0.000020 loss_cls: 4.2631 (4.1855) grad_norm: 2.4677 (2.7967) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:03 root] (utils.py 283): INFO Epoch: [0] [ 880/2502] eta: 0:21:03 lr: 0.000020 loss_cls: 4.1733 (4.1821) grad_norm: 2.4738 (2.7929) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:11 root] (utils.py 283): INFO Epoch: [0] [ 890/2502] eta: 0:20:54 lr: 0.000020 loss_cls: 3.9606 (4.1784) grad_norm: 2.4593 (2.7894) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 11:33:18 root] (utils.py 283): INFO Epoch: [0] [ 900/2502] eta: 0:20:46 lr: 0.000020 loss_cls: 4.0889 (4.1781) grad_norm: 2.4863 (2.7864) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:26 root] (utils.py 283): INFO Epoch: [0] [ 910/2502] eta: 0:20:38 lr: 0.000020 loss_cls: 4.4407 (4.1780) grad_norm: 2.4989 (2.7827) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:34 root] (utils.py 283): INFO Epoch: [0] [ 920/2502] eta: 0:20:30 lr: 0.000020 loss_cls: 4.4815 (4.1791) grad_norm: 2.4280 (2.7790) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:41 root] (utils.py 283): INFO Epoch: [0] [ 930/2502] eta: 0:20:22 lr: 0.000020 loss_cls: 4.4903 (4.1815) grad_norm: 2.4406 (2.7755) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:49 root] (utils.py 283): INFO Epoch: [0] [ 940/2502] eta: 0:20:14 lr: 0.000020 loss_cls: 4.2820 (4.1787) grad_norm: 2.4650 (2.7721) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 11:33:57 root] (utils.py 283): INFO Epoch: [0] [ 950/2502] eta: 0:20:06 lr: 0.000020 loss_cls: 3.5044 (4.1730) grad_norm: 2.4720 (2.7698) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 11:34:05 root] (utils.py 283): INFO Epoch: [0] [ 960/2502] eta: 0:19:59 lr: 0.000020 loss_cls: 3.9590 (4.1727) grad_norm: 2.4263 (2.7668) time: 0.7750 data: 0.0003 max mem: 8426 +[2024-12-10 11:34:12 root] (utils.py 283): INFO Epoch: [0] [ 970/2502] eta: 0:19:51 lr: 0.000020 loss_cls: 4.1463 (4.1726) grad_norm: 2.3914 (2.7627) time: 0.7879 data: 0.0003 max mem: 8426 +[2024-12-10 11:34:20 root] (utils.py 283): INFO Epoch: [0] [ 980/2502] eta: 0:19:43 lr: 0.000020 loss_cls: 4.1463 (4.1717) grad_norm: 2.3601 (2.7594) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 11:34:28 root] (utils.py 283): INFO Epoch: [0] [ 990/2502] eta: 0:19:35 lr: 0.000020 loss_cls: 4.4096 (4.1716) grad_norm: 2.4882 (2.7573) time: 0.7704 data: 0.0003 max mem: 8426 +[2024-12-10 11:34:36 root] (utils.py 283): INFO Epoch: [0] [1000/2502] eta: 0:19:27 lr: 0.000020 loss_cls: 4.2559 (4.1696) grad_norm: 2.4882 (2.7548) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 11:34:43 root] (utils.py 283): INFO Epoch: [0] [1010/2502] eta: 0:19:19 lr: 0.000020 loss_cls: 4.1908 (4.1702) grad_norm: 2.4850 (2.7521) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 11:34:51 root] (utils.py 283): INFO Epoch: [0] [1020/2502] eta: 0:19:11 lr: 0.000020 loss_cls: 4.2187 (4.1693) grad_norm: 2.4930 (2.7493) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 11:34:58 root] (utils.py 283): INFO Epoch: [0] [1030/2502] eta: 0:19:03 lr: 0.000020 loss_cls: 3.8572 (4.1650) grad_norm: 2.5191 (2.7473) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:06 root] (utils.py 283): INFO Epoch: [0] [1040/2502] eta: 0:18:56 lr: 0.000020 loss_cls: 4.2787 (4.1673) grad_norm: 2.5455 (2.7460) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:14 root] (utils.py 283): INFO Epoch: [0] [1050/2502] eta: 0:18:48 lr: 0.000020 loss_cls: 4.3164 (4.1658) grad_norm: 2.5455 (2.7449) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:22 root] (utils.py 283): INFO Epoch: [0] [1060/2502] eta: 0:18:40 lr: 0.000020 loss_cls: 4.1424 (4.1665) grad_norm: 2.5274 (2.7429) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:29 root] (utils.py 283): INFO Epoch: [0] [1070/2502] eta: 0:18:32 lr: 0.000020 loss_cls: 4.1424 (4.1657) grad_norm: 2.4737 (2.7408) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:37 root] (utils.py 283): INFO Epoch: [0] [1080/2502] eta: 0:18:24 lr: 0.000020 loss_cls: 4.1228 (4.1645) grad_norm: 2.4732 (2.7388) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:45 root] (utils.py 283): INFO Epoch: [0] [1090/2502] eta: 0:18:16 lr: 0.000020 loss_cls: 4.0185 (4.1628) grad_norm: 2.4641 (2.7364) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 11:35:52 root] (utils.py 283): INFO Epoch: [0] [1100/2502] eta: 0:18:08 lr: 0.000020 loss_cls: 4.0185 (4.1619) grad_norm: 2.4188 (2.7344) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:00 root] (utils.py 283): INFO Epoch: [0] [1110/2502] eta: 0:18:00 lr: 0.000020 loss_cls: 4.0521 (4.1613) grad_norm: 2.3840 (2.7317) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:08 root] (utils.py 283): INFO Epoch: [0] [1120/2502] eta: 0:17:52 lr: 0.000020 loss_cls: 4.2604 (4.1627) grad_norm: 2.4506 (2.7297) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:15 root] (utils.py 283): INFO Epoch: [0] [1130/2502] eta: 0:17:45 lr: 0.000020 loss_cls: 4.3998 (4.1642) grad_norm: 2.4520 (2.7272) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:23 root] (utils.py 283): INFO Epoch: [0] [1140/2502] eta: 0:17:37 lr: 0.000020 loss_cls: 4.3108 (4.1622) grad_norm: 2.4479 (2.7249) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:31 root] (utils.py 283): INFO Epoch: [0] [1150/2502] eta: 0:17:29 lr: 0.000020 loss_cls: 4.0657 (4.1607) grad_norm: 2.4374 (2.7222) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:38 root] (utils.py 283): INFO Epoch: [0] [1160/2502] eta: 0:17:21 lr: 0.000020 loss_cls: 4.0924 (4.1600) grad_norm: 2.4374 (2.7200) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:46 root] (utils.py 283): INFO Epoch: [0] [1170/2502] eta: 0:17:13 lr: 0.000020 loss_cls: 4.0262 (4.1587) grad_norm: 2.4850 (2.7180) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 11:36:54 root] (utils.py 283): INFO Epoch: [0] [1180/2502] eta: 0:17:05 lr: 0.000020 loss_cls: 4.3859 (4.1614) grad_norm: 2.4940 (2.7160) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 11:37:01 root] (utils.py 283): INFO Epoch: [0] [1190/2502] eta: 0:16:57 lr: 0.000020 loss_cls: 4.4411 (4.1632) grad_norm: 2.4029 (2.7135) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 11:37:09 root] (utils.py 283): INFO Epoch: [0] [1200/2502] eta: 0:16:50 lr: 0.000020 loss_cls: 4.3730 (4.1638) grad_norm: 2.3823 (2.7112) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 11:37:17 root] (utils.py 283): INFO Epoch: [0] [1210/2502] eta: 0:16:42 lr: 0.000020 loss_cls: 4.1340 (4.1627) grad_norm: 2.4421 (2.7089) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:37:24 root] (utils.py 283): INFO Epoch: [0] [1220/2502] eta: 0:16:34 lr: 0.000020 loss_cls: 4.1340 (4.1621) grad_norm: 2.4759 (2.7074) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 11:37:32 root] (utils.py 283): INFO Epoch: [0] [1230/2502] eta: 0:16:26 lr: 0.000020 loss_cls: 4.1909 (4.1614) grad_norm: 2.4935 (2.7053) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 11:37:40 root] (utils.py 283): INFO Epoch: [0] [1240/2502] eta: 0:16:18 lr: 0.000020 loss_cls: 4.2267 (4.1613) grad_norm: 2.4379 (2.7034) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 11:37:47 root] (utils.py 283): INFO Epoch: [0] [1250/2502] eta: 0:16:10 lr: 0.000020 loss_cls: 4.1679 (4.1593) grad_norm: 2.4616 (2.7013) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 11:37:55 root] (utils.py 283): INFO Epoch: [0] [1260/2502] eta: 0:16:02 lr: 0.000020 loss_cls: 4.2360 (4.1599) grad_norm: 2.4757 (2.7001) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:03 root] (utils.py 283): INFO Epoch: [0] [1270/2502] eta: 0:15:55 lr: 0.000020 loss_cls: 4.3093 (4.1604) grad_norm: 2.4879 (2.6980) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:10 root] (utils.py 283): INFO Epoch: [0] [1280/2502] eta: 0:15:47 lr: 0.000020 loss_cls: 4.4528 (4.1622) grad_norm: 2.4341 (2.6964) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:18 root] (utils.py 283): INFO Epoch: [0] [1290/2502] eta: 0:15:39 lr: 0.000020 loss_cls: 4.4528 (4.1632) grad_norm: 2.4500 (2.6948) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:26 root] (utils.py 283): INFO Epoch: [0] [1300/2502] eta: 0:15:31 lr: 0.000020 loss_cls: 4.2611 (4.1632) grad_norm: 2.4569 (2.6927) time: 0.7724 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:33 root] (utils.py 283): INFO Epoch: [0] [1310/2502] eta: 0:15:23 lr: 0.000020 loss_cls: 4.0436 (4.1612) grad_norm: 2.4442 (2.6909) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:41 root] (utils.py 283): INFO Epoch: [0] [1320/2502] eta: 0:15:16 lr: 0.000020 loss_cls: 3.8708 (4.1584) grad_norm: 2.4442 (2.6897) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 11:38:49 root] (utils.py 283): INFO Epoch: [0] [1330/2502] eta: 0:15:08 lr: 0.000020 loss_cls: 4.1532 (4.1591) grad_norm: 2.4697 (2.6883) time: 0.7716 data: 0.0003 max mem: 8426 +[2024-12-10 11:38:56 root] (utils.py 283): INFO Epoch: [0] [1340/2502] eta: 0:15:00 lr: 0.000020 loss_cls: 4.4162 (4.1595) grad_norm: 2.4041 (2.6864) time: 0.7709 data: 0.0003 max mem: 8426 +[2024-12-10 11:39:04 root] (utils.py 283): INFO Epoch: [0] [1350/2502] eta: 0:14:52 lr: 0.000020 loss_cls: 4.2022 (4.1585) grad_norm: 2.3506 (2.6841) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:12 root] (utils.py 283): INFO Epoch: [0] [1360/2502] eta: 0:14:44 lr: 0.000020 loss_cls: 4.2022 (4.1599) grad_norm: 2.3970 (2.6824) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:19 root] (utils.py 283): INFO Epoch: [0] [1370/2502] eta: 0:14:36 lr: 0.000020 loss_cls: 4.3531 (4.1605) grad_norm: 2.4487 (2.6812) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:27 root] (utils.py 283): INFO Epoch: [0] [1380/2502] eta: 0:14:29 lr: 0.000020 loss_cls: 4.3531 (4.1614) grad_norm: 2.4763 (2.6795) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:35 root] (utils.py 283): INFO Epoch: [0] [1390/2502] eta: 0:14:21 lr: 0.000020 loss_cls: 4.3828 (4.1618) grad_norm: 2.4786 (2.6784) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:42 root] (utils.py 283): INFO Epoch: [0] [1400/2502] eta: 0:14:13 lr: 0.000020 loss_cls: 4.3001 (4.1615) grad_norm: 2.4684 (2.6765) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:50 root] (utils.py 283): INFO Epoch: [0] [1410/2502] eta: 0:14:05 lr: 0.000020 loss_cls: 4.3157 (4.1636) grad_norm: 2.4598 (2.6755) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 11:39:58 root] (utils.py 283): INFO Epoch: [0] [1420/2502] eta: 0:13:57 lr: 0.000020 loss_cls: 4.1919 (4.1621) grad_norm: 2.4366 (2.6738) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:05 root] (utils.py 283): INFO Epoch: [0] [1430/2502] eta: 0:13:50 lr: 0.000020 loss_cls: 4.0719 (4.1621) grad_norm: 2.3842 (2.6724) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:13 root] (utils.py 283): INFO Epoch: [0] [1440/2502] eta: 0:13:42 lr: 0.000020 loss_cls: 4.2856 (4.1606) grad_norm: 2.4627 (2.6712) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:21 root] (utils.py 283): INFO Epoch: [0] [1450/2502] eta: 0:13:34 lr: 0.000020 loss_cls: 4.2889 (4.1610) grad_norm: 2.4647 (2.6701) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:28 root] (utils.py 283): INFO Epoch: [0] [1460/2502] eta: 0:13:26 lr: 0.000020 loss_cls: 4.3292 (4.1616) grad_norm: 2.4551 (2.6683) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:36 root] (utils.py 283): INFO Epoch: [0] [1470/2502] eta: 0:13:18 lr: 0.000020 loss_cls: 4.3292 (4.1603) grad_norm: 2.3524 (2.6666) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:44 root] (utils.py 283): INFO Epoch: [0] [1480/2502] eta: 0:13:11 lr: 0.000020 loss_cls: 3.8302 (4.1573) grad_norm: 2.3350 (2.6647) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:51 root] (utils.py 283): INFO Epoch: [0] [1490/2502] eta: 0:13:03 lr: 0.000020 loss_cls: 3.6022 (4.1534) grad_norm: 2.4332 (2.6635) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 11:40:59 root] (utils.py 283): INFO Epoch: [0] [1500/2502] eta: 0:12:55 lr: 0.000020 loss_cls: 3.7266 (4.1527) grad_norm: 2.4375 (2.6619) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:07 root] (utils.py 283): INFO Epoch: [0] [1510/2502] eta: 0:12:47 lr: 0.000020 loss_cls: 4.2332 (4.1528) grad_norm: 2.3752 (2.6602) time: 0.7789 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:15 root] (utils.py 283): INFO Epoch: [0] [1520/2502] eta: 0:12:40 lr: 0.000020 loss_cls: 4.2332 (4.1536) grad_norm: 2.3898 (2.6590) time: 0.7836 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:22 root] (utils.py 283): INFO Epoch: [0] [1530/2502] eta: 0:12:32 lr: 0.000020 loss_cls: 4.1751 (4.1528) grad_norm: 2.4824 (2.6578) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:30 root] (utils.py 283): INFO Epoch: [0] [1540/2502] eta: 0:12:24 lr: 0.000020 loss_cls: 4.1586 (4.1524) grad_norm: 2.4509 (2.6561) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:38 root] (utils.py 283): INFO Epoch: [0] [1550/2502] eta: 0:12:16 lr: 0.000020 loss_cls: 4.1241 (4.1503) grad_norm: 2.4374 (2.6546) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:45 root] (utils.py 283): INFO Epoch: [0] [1560/2502] eta: 0:12:09 lr: 0.000020 loss_cls: 3.9014 (4.1488) grad_norm: 2.4000 (2.6531) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 11:41:53 root] (utils.py 283): INFO Epoch: [0] [1570/2502] eta: 0:12:01 lr: 0.000020 loss_cls: 3.9014 (4.1460) grad_norm: 2.4000 (2.6519) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:01 root] (utils.py 283): INFO Epoch: [0] [1580/2502] eta: 0:11:53 lr: 0.000020 loss_cls: 3.9709 (4.1450) grad_norm: 2.4222 (2.6505) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:08 root] (utils.py 283): INFO Epoch: [0] [1590/2502] eta: 0:11:45 lr: 0.000020 loss_cls: 4.3211 (4.1463) grad_norm: 2.4245 (2.6496) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:16 root] (utils.py 283): INFO Epoch: [0] [1600/2502] eta: 0:11:37 lr: 0.000020 loss_cls: 4.3433 (4.1476) grad_norm: 2.4117 (2.6480) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:23 root] (utils.py 283): INFO Epoch: [0] [1610/2502] eta: 0:11:29 lr: 0.000020 loss_cls: 4.2282 (4.1475) grad_norm: 2.3755 (2.6468) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 11:42:31 root] (utils.py 283): INFO Epoch: [0] [1620/2502] eta: 0:11:22 lr: 0.000020 loss_cls: 4.1159 (4.1476) grad_norm: 2.4872 (2.6459) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:39 root] (utils.py 283): INFO Epoch: [0] [1630/2502] eta: 0:11:14 lr: 0.000020 loss_cls: 4.2167 (4.1476) grad_norm: 2.4682 (2.6447) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:46 root] (utils.py 283): INFO Epoch: [0] [1640/2502] eta: 0:11:06 lr: 0.000020 loss_cls: 4.2978 (4.1470) grad_norm: 2.4165 (2.6438) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 11:42:54 root] (utils.py 283): INFO Epoch: [0] [1650/2502] eta: 0:10:58 lr: 0.000020 loss_cls: 3.9557 (4.1435) grad_norm: 2.4165 (2.6423) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:02 root] (utils.py 283): INFO Epoch: [0] [1660/2502] eta: 0:10:51 lr: 0.000020 loss_cls: 3.5112 (4.1405) grad_norm: 2.4080 (2.6410) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:09 root] (utils.py 283): INFO Epoch: [0] [1670/2502] eta: 0:10:43 lr: 0.000020 loss_cls: 3.8048 (4.1402) grad_norm: 2.4080 (2.6396) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:17 root] (utils.py 283): INFO Epoch: [0] [1680/2502] eta: 0:10:35 lr: 0.000020 loss_cls: 4.3514 (4.1393) grad_norm: 2.3979 (2.6381) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:25 root] (utils.py 283): INFO Epoch: [0] [1690/2502] eta: 0:10:27 lr: 0.000020 loss_cls: 4.1906 (4.1386) grad_norm: 2.3978 (2.6369) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:32 root] (utils.py 283): INFO Epoch: [0] [1700/2502] eta: 0:10:19 lr: 0.000020 loss_cls: 4.3280 (4.1409) grad_norm: 2.4296 (2.6358) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:40 root] (utils.py 283): INFO Epoch: [0] [1710/2502] eta: 0:10:12 lr: 0.000020 loss_cls: 4.2491 (4.1402) grad_norm: 2.4626 (2.6349) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 11:43:47 root] (utils.py 283): INFO Epoch: [0] [1720/2502] eta: 0:10:04 lr: 0.000020 loss_cls: 3.9478 (4.1393) grad_norm: 2.4626 (2.6337) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 11:43:55 root] (utils.py 283): INFO Epoch: [0] [1730/2502] eta: 0:09:56 lr: 0.000020 loss_cls: 4.3280 (4.1401) grad_norm: 2.4014 (2.6323) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:03 root] (utils.py 283): INFO Epoch: [0] [1740/2502] eta: 0:09:48 lr: 0.000020 loss_cls: 4.2793 (4.1391) grad_norm: 2.4013 (2.6315) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:11 root] (utils.py 283): INFO Epoch: [0] [1750/2502] eta: 0:09:41 lr: 0.000020 loss_cls: 3.8130 (4.1376) grad_norm: 2.4207 (2.6306) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:18 root] (utils.py 283): INFO Epoch: [0] [1760/2502] eta: 0:09:33 lr: 0.000020 loss_cls: 3.7418 (4.1370) grad_norm: 2.3920 (2.6292) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:26 root] (utils.py 283): INFO Epoch: [0] [1770/2502] eta: 0:09:25 lr: 0.000020 loss_cls: 4.2796 (4.1373) grad_norm: 2.3646 (2.6282) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:34 root] (utils.py 283): INFO Epoch: [0] [1780/2502] eta: 0:09:17 lr: 0.000020 loss_cls: 4.2855 (4.1367) grad_norm: 2.4046 (2.6270) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:41 root] (utils.py 283): INFO Epoch: [0] [1790/2502] eta: 0:09:10 lr: 0.000020 loss_cls: 4.1301 (4.1356) grad_norm: 2.4233 (2.6260) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:49 root] (utils.py 283): INFO Epoch: [0] [1800/2502] eta: 0:09:02 lr: 0.000020 loss_cls: 4.1925 (4.1367) grad_norm: 2.4293 (2.6251) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 11:44:57 root] (utils.py 283): INFO Epoch: [0] [1810/2502] eta: 0:08:54 lr: 0.000020 loss_cls: 4.1925 (4.1358) grad_norm: 2.4545 (2.6240) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:04 root] (utils.py 283): INFO Epoch: [0] [1820/2502] eta: 0:08:46 lr: 0.000020 loss_cls: 4.0656 (4.1357) grad_norm: 2.4194 (2.6226) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:12 root] (utils.py 283): INFO Epoch: [0] [1830/2502] eta: 0:08:39 lr: 0.000020 loss_cls: 4.3577 (4.1373) grad_norm: 2.4305 (2.6215) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:20 root] (utils.py 283): INFO Epoch: [0] [1840/2502] eta: 0:08:31 lr: 0.000020 loss_cls: 4.2752 (4.1363) grad_norm: 2.4470 (2.6206) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:27 root] (utils.py 283): INFO Epoch: [0] [1850/2502] eta: 0:08:23 lr: 0.000020 loss_cls: 3.9597 (4.1351) grad_norm: 2.4235 (2.6200) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:35 root] (utils.py 283): INFO Epoch: [0] [1860/2502] eta: 0:08:16 lr: 0.000020 loss_cls: 3.9573 (4.1343) grad_norm: 2.4235 (2.6189) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:43 root] (utils.py 283): INFO Epoch: [0] [1870/2502] eta: 0:08:08 lr: 0.000020 loss_cls: 4.1006 (4.1328) grad_norm: 2.4460 (2.6183) time: 0.7843 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:51 root] (utils.py 283): INFO Epoch: [0] [1880/2502] eta: 0:08:00 lr: 0.000020 loss_cls: 4.3441 (4.1342) grad_norm: 2.4860 (2.6176) time: 0.7828 data: 0.0002 max mem: 8426 +[2024-12-10 11:45:59 root] (utils.py 283): INFO Epoch: [0] [1890/2502] eta: 0:07:52 lr: 0.000020 loss_cls: 4.3441 (4.1330) grad_norm: 2.4796 (2.6170) time: 0.7896 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:07 root] (utils.py 283): INFO Epoch: [0] [1900/2502] eta: 0:07:45 lr: 0.000020 loss_cls: 3.9394 (4.1321) grad_norm: 2.4279 (2.6162) time: 0.7903 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:14 root] (utils.py 283): INFO Epoch: [0] [1910/2502] eta: 0:07:37 lr: 0.000020 loss_cls: 3.9461 (4.1322) grad_norm: 2.3917 (2.6147) time: 0.7837 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:22 root] (utils.py 283): INFO Epoch: [0] [1920/2502] eta: 0:07:29 lr: 0.000020 loss_cls: 3.8028 (4.1293) grad_norm: 2.3681 (2.6136) time: 0.7775 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:30 root] (utils.py 283): INFO Epoch: [0] [1930/2502] eta: 0:07:22 lr: 0.000020 loss_cls: 3.6932 (4.1284) grad_norm: 2.4290 (2.6129) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:37 root] (utils.py 283): INFO Epoch: [0] [1940/2502] eta: 0:07:14 lr: 0.000020 loss_cls: 4.0593 (4.1272) grad_norm: 2.4343 (2.6120) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:45 root] (utils.py 283): INFO Epoch: [0] [1950/2502] eta: 0:07:06 lr: 0.000020 loss_cls: 4.0069 (4.1259) grad_norm: 2.3710 (2.6110) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:46:53 root] (utils.py 283): INFO Epoch: [0] [1960/2502] eta: 0:06:58 lr: 0.000020 loss_cls: 3.9885 (4.1247) grad_norm: 2.3718 (2.6097) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:00 root] (utils.py 283): INFO Epoch: [0] [1970/2502] eta: 0:06:51 lr: 0.000020 loss_cls: 3.8871 (4.1230) grad_norm: 2.3388 (2.6084) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:08 root] (utils.py 283): INFO Epoch: [0] [1980/2502] eta: 0:06:43 lr: 0.000020 loss_cls: 3.7151 (4.1222) grad_norm: 2.3817 (2.6077) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:16 root] (utils.py 283): INFO Epoch: [0] [1990/2502] eta: 0:06:35 lr: 0.000020 loss_cls: 4.2202 (4.1215) grad_norm: 2.4387 (2.6068) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:24 root] (utils.py 283): INFO Epoch: [0] [2000/2502] eta: 0:06:27 lr: 0.000020 loss_cls: 4.0711 (4.1205) grad_norm: 2.3880 (2.6056) time: 0.7735 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:31 root] (utils.py 283): INFO Epoch: [0] [2010/2502] eta: 0:06:20 lr: 0.000020 loss_cls: 4.2745 (4.1217) grad_norm: 2.3880 (2.6048) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:39 root] (utils.py 283): INFO Epoch: [0] [2020/2502] eta: 0:06:12 lr: 0.000020 loss_cls: 4.2170 (4.1209) grad_norm: 2.4094 (2.6035) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:46 root] (utils.py 283): INFO Epoch: [0] [2030/2502] eta: 0:06:04 lr: 0.000020 loss_cls: 4.2170 (4.1217) grad_norm: 2.3530 (2.6025) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 11:47:54 root] (utils.py 283): INFO Epoch: [0] [2040/2502] eta: 0:05:56 lr: 0.000020 loss_cls: 4.3947 (4.1222) grad_norm: 2.3626 (2.6014) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:02 root] (utils.py 283): INFO Epoch: [0] [2050/2502] eta: 0:05:49 lr: 0.000020 loss_cls: 4.1043 (4.1210) grad_norm: 2.3626 (2.6003) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:09 root] (utils.py 283): INFO Epoch: [0] [2060/2502] eta: 0:05:41 lr: 0.000020 loss_cls: 3.8346 (4.1203) grad_norm: 2.4113 (2.5999) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:17 root] (utils.py 283): INFO Epoch: [0] [2070/2502] eta: 0:05:33 lr: 0.000020 loss_cls: 4.1514 (4.1206) grad_norm: 2.3833 (2.5988) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:25 root] (utils.py 283): INFO Epoch: [0] [2080/2502] eta: 0:05:25 lr: 0.000020 loss_cls: 4.2081 (4.1199) grad_norm: 2.3567 (2.5977) time: 0.7745 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:32 root] (utils.py 283): INFO Epoch: [0] [2090/2502] eta: 0:05:18 lr: 0.000020 loss_cls: 4.2081 (4.1205) grad_norm: 2.3662 (2.5967) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:40 root] (utils.py 283): INFO Epoch: [0] [2100/2502] eta: 0:05:10 lr: 0.000020 loss_cls: 4.2984 (4.1194) grad_norm: 2.3901 (2.5960) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:48 root] (utils.py 283): INFO Epoch: [0] [2110/2502] eta: 0:05:02 lr: 0.000020 loss_cls: 4.3849 (4.1201) grad_norm: 2.3901 (2.5951) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 11:48:55 root] (utils.py 283): INFO Epoch: [0] [2120/2502] eta: 0:04:55 lr: 0.000020 loss_cls: 4.3849 (4.1196) grad_norm: 2.3742 (2.5939) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 11:49:03 root] (utils.py 283): INFO Epoch: [0] [2130/2502] eta: 0:04:47 lr: 0.000020 loss_cls: 4.2522 (4.1195) grad_norm: 2.4066 (2.5931) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 11:49:11 root] (utils.py 283): INFO Epoch: [0] [2140/2502] eta: 0:04:39 lr: 0.000020 loss_cls: 4.3309 (4.1206) grad_norm: 2.3481 (2.5921) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 11:49:18 root] (utils.py 283): INFO Epoch: [0] [2150/2502] eta: 0:04:31 lr: 0.000020 loss_cls: 4.4104 (4.1202) grad_norm: 2.3364 (2.5911) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 11:49:26 root] (utils.py 283): INFO Epoch: [0] [2160/2502] eta: 0:04:24 lr: 0.000020 loss_cls: 4.3568 (4.1207) grad_norm: 2.3190 (2.5897) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 11:49:34 root] (utils.py 283): INFO Epoch: [0] [2170/2502] eta: 0:04:16 lr: 0.000020 loss_cls: 4.4471 (4.1216) grad_norm: 2.3442 (2.5888) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 11:49:41 root] (utils.py 283): INFO Epoch: [0] [2180/2502] eta: 0:04:08 lr: 0.000020 loss_cls: 4.2073 (4.1209) grad_norm: 2.3979 (2.5883) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 11:49:49 root] (utils.py 283): INFO Epoch: [0] [2190/2502] eta: 0:04:00 lr: 0.000020 loss_cls: 4.3316 (4.1220) grad_norm: 2.4077 (2.5876) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 11:49:57 root] (utils.py 283): INFO Epoch: [0] [2200/2502] eta: 0:03:53 lr: 0.000020 loss_cls: 4.1751 (4.1201) grad_norm: 2.4184 (2.5870) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 11:50:04 root] (utils.py 283): INFO Epoch: [0] [2210/2502] eta: 0:03:45 lr: 0.000020 loss_cls: 3.8376 (4.1197) grad_norm: 2.4174 (2.5862) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 11:50:12 root] (utils.py 283): INFO Epoch: [0] [2220/2502] eta: 0:03:37 lr: 0.000020 loss_cls: 4.1629 (4.1191) grad_norm: 2.4000 (2.5857) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 11:50:20 root] (utils.py 283): INFO Epoch: [0] [2230/2502] eta: 0:03:29 lr: 0.000020 loss_cls: 3.9666 (4.1180) grad_norm: 2.4111 (2.5849) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 11:50:27 root] (utils.py 283): INFO Epoch: [0] [2240/2502] eta: 0:03:22 lr: 0.000020 loss_cls: 3.9656 (4.1170) grad_norm: 2.4499 (2.5845) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 11:50:35 root] (utils.py 283): INFO Epoch: [0] [2250/2502] eta: 0:03:14 lr: 0.000020 loss_cls: 3.8151 (4.1157) grad_norm: 2.4318 (2.5836) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 11:50:43 root] (utils.py 283): INFO Epoch: [0] [2260/2502] eta: 0:03:06 lr: 0.000020 loss_cls: 3.7289 (4.1151) grad_norm: 2.4071 (2.5832) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 11:50:51 root] (utils.py 283): INFO Epoch: [0] [2270/2502] eta: 0:02:59 lr: 0.000020 loss_cls: 4.0364 (4.1156) grad_norm: 2.4786 (2.5828) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-10 11:50:58 root] (utils.py 283): INFO Epoch: [0] [2280/2502] eta: 0:02:51 lr: 0.000020 loss_cls: 4.1532 (4.1165) grad_norm: 2.4706 (2.5822) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:06 root] (utils.py 283): INFO Epoch: [0] [2290/2502] eta: 0:02:43 lr: 0.000020 loss_cls: 4.3060 (4.1163) grad_norm: 2.3934 (2.5816) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:14 root] (utils.py 283): INFO Epoch: [0] [2300/2502] eta: 0:02:35 lr: 0.000020 loss_cls: 3.8266 (4.1154) grad_norm: 2.4340 (2.5810) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:21 root] (utils.py 283): INFO Epoch: [0] [2310/2502] eta: 0:02:28 lr: 0.000020 loss_cls: 3.8266 (4.1141) grad_norm: 2.4340 (2.5804) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:29 root] (utils.py 283): INFO Epoch: [0] [2320/2502] eta: 0:02:20 lr: 0.000020 loss_cls: 3.9956 (4.1145) grad_norm: 2.3371 (2.5794) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:37 root] (utils.py 283): INFO Epoch: [0] [2330/2502] eta: 0:02:12 lr: 0.000020 loss_cls: 4.4328 (4.1159) grad_norm: 2.3629 (2.5787) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:44 root] (utils.py 283): INFO Epoch: [0] [2340/2502] eta: 0:02:05 lr: 0.000020 loss_cls: 4.4714 (4.1157) grad_norm: 2.5080 (2.5789) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 11:51:52 root] (utils.py 283): INFO Epoch: [0] [2350/2502] eta: 0:01:57 lr: 0.000020 loss_cls: 4.0786 (4.1156) grad_norm: 2.4795 (2.5776) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:00 root] (utils.py 283): INFO Epoch: [0] [2360/2502] eta: 0:01:49 lr: 0.000020 loss_cls: 3.9681 (4.1146) grad_norm: 2.3028 (2.5768) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:07 root] (utils.py 283): INFO Epoch: [0] [2370/2502] eta: 0:01:41 lr: 0.000020 loss_cls: 3.9340 (4.1141) grad_norm: 2.3954 (2.5764) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:15 root] (utils.py 283): INFO Epoch: [0] [2380/2502] eta: 0:01:34 lr: 0.000020 loss_cls: 4.1556 (4.1146) grad_norm: 2.4223 (2.5757) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:23 root] (utils.py 283): INFO Epoch: [0] [2390/2502] eta: 0:01:26 lr: 0.000020 loss_cls: 4.0820 (4.1134) grad_norm: 2.3521 (2.5749) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:30 root] (utils.py 283): INFO Epoch: [0] [2400/2502] eta: 0:01:18 lr: 0.000020 loss_cls: 4.0164 (4.1129) grad_norm: 2.3168 (2.5740) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:38 root] (utils.py 283): INFO Epoch: [0] [2410/2502] eta: 0:01:10 lr: 0.000020 loss_cls: 4.2786 (4.1135) grad_norm: 2.3494 (2.5734) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:45 root] (utils.py 283): INFO Epoch: [0] [2420/2502] eta: 0:01:03 lr: 0.000020 loss_cls: 4.1675 (4.1126) grad_norm: 2.4505 (2.5729) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 11:52:53 root] (utils.py 283): INFO Epoch: [0] [2430/2502] eta: 0:00:55 lr: 0.000020 loss_cls: 3.7248 (4.1125) grad_norm: 2.4625 (2.5724) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 11:53:01 root] (utils.py 283): INFO Epoch: [0] [2440/2502] eta: 0:00:47 lr: 0.000020 loss_cls: 4.2543 (4.1127) grad_norm: 2.4452 (2.5720) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 11:53:08 root] (utils.py 283): INFO Epoch: [0] [2450/2502] eta: 0:00:40 lr: 0.000020 loss_cls: 4.1955 (4.1132) grad_norm: 2.3887 (2.5713) time: 0.7543 data: 0.0002 max mem: 8426 +[2024-12-10 11:53:16 root] (utils.py 283): INFO Epoch: [0] [2460/2502] eta: 0:00:32 lr: 0.000020 loss_cls: 4.2702 (4.1132) grad_norm: 2.3709 (2.5706) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 11:53:23 root] (utils.py 283): INFO Epoch: [0] [2470/2502] eta: 0:00:24 lr: 0.000020 loss_cls: 4.3026 (4.1123) grad_norm: 2.3791 (2.5701) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 11:53:31 root] (utils.py 283): INFO Epoch: [0] [2480/2502] eta: 0:00:16 lr: 0.000020 loss_cls: 3.7322 (4.1113) grad_norm: 2.4095 (2.5695) time: 0.7553 data: 0.0002 max mem: 8426 +[2024-12-10 11:53:39 root] (utils.py 283): INFO Epoch: [0] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 3.9736 (4.1121) grad_norm: 2.4325 (2.5689) time: 0.7822 data: 0.0226 max mem: 8426 +[2024-12-10 11:53:47 root] (utils.py 283): INFO Epoch: [0] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.9736 (4.1114) grad_norm: 2.3822 (2.5682) time: 0.7796 data: 0.0226 max mem: 8426 +[2024-12-10 11:53:47 root] (utils.py 283): INFO Epoch: [0] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 3.9736 (4.1109) grad_norm: 2.3822 (2.5682) time: 0.7793 data: 0.0226 max mem: 8426 +[2024-12-10 11:53:47 root] (utils.py 297): INFO Epoch: [0] Total time: 0:32:10 (0.7715 s / it) +[2024-12-10 11:53:47 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 3.9736 (4.1097) grad_norm: 2.3822 (2.5682) +[2024-12-10 11:53:48 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:13 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6165 (0.6165) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 100.0000 (100.0000) time: 0.1382 data: 0.0004 max mem: 8426 +[2024-12-10 11:53:49 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7311 (0.8134) acc1: 86.7188 (82.3864) acc3: 94.5312 (93.3239) acc5: 96.8750 (96.8040) time: 0.1337 data: 0.0003 max mem: 8426 +[2024-12-10 11:53:50 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8519 (0.8677) acc1: 80.4688 (80.9896) acc3: 92.1875 (92.8199) acc5: 95.3125 (95.7961) time: 0.1312 data: 0.0004 max mem: 8426 +[2024-12-10 11:53:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9532 (0.8787) acc1: 78.9062 (80.1915) acc3: 92.1875 (93.0444) acc5: 95.3125 (95.8417) time: 0.1289 data: 0.0004 max mem: 8426 +[2024-12-10 11:53:54 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8134 (0.8687) acc1: 79.6875 (80.5450) acc3: 93.7500 (93.1021) acc5: 96.0938 (95.7698) time: 0.1590 data: 0.0311 max mem: 8426 +[2024-12-10 11:53:55 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0738 (0.9592) acc1: 75.7812 (78.4620) acc3: 87.5000 (91.5901) acc5: 91.4062 (94.5772) time: 0.1587 data: 0.0312 max mem: 8426 +[2024-12-10 11:53:56 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2880 (1.0045) acc1: 71.0938 (77.6639) acc3: 85.1562 (90.7531) acc5: 89.0625 (93.7756) time: 0.1346 data: 0.0066 max mem: 8426 +[2024-12-10 11:53:58 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2260 (1.0465) acc1: 73.4375 (76.7826) acc3: 85.9375 (90.1188) acc5: 89.8438 (93.2768) time: 0.1472 data: 0.0174 max mem: 8426 +[2024-12-10 11:53:59 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2355 (1.0816) acc1: 72.6562 (75.9838) acc3: 85.9375 (89.5255) acc5: 89.8438 (92.7662) time: 0.1461 data: 0.0165 max mem: 8426 +[2024-12-10 11:54:00 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2581 (1.1095) acc1: 68.7500 (75.2318) acc3: 85.1562 (89.0968) acc5: 89.8438 (92.4880) time: 0.1358 data: 0.0061 max mem: 8426 +[2024-12-10 11:54:03 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1284 (1.0980) acc1: 71.0938 (75.4480) acc3: 88.2812 (89.3120) acc5: 90.6250 (92.6720) time: 0.1950 data: 0.0118 max mem: 8426 +[2024-12-10 11:54:03 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1530 s / it) +[2024-12-10 11:54:03 root] (engine.py 264): INFO * Acc@1 75.414 Acc@3 89.380 Acc@5 92.672 loss 1.096 flops 1.285 layer_flops 1.251 +[2024-12-10 11:54:03 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.4% +[2024-12-10 11:54:03 root] (main.py 576): INFO Max accuracy: 75.41% +[2024-12-10 11:54:05 root] (utils.py 283): INFO Epoch: [1] [ 0/2502] eta: 0:46:39 lr: 0.000020 loss_cls: 3.6827 (3.6827) grad_norm: 2.4670 (2.4670) time: 1.1191 data: 0.0005 max mem: 8426 +[2024-12-10 11:54:12 root] (utils.py 283): INFO Epoch: [1] [ 10/2502] eta: 0:33:02 lr: 0.000020 loss_cls: 3.7315 (3.9655) grad_norm: 2.5051 (2.5241) time: 0.7957 data: 0.0002 max mem: 8426 +[2024-12-10 11:54:20 root] (utils.py 283): INFO Epoch: [1] [ 20/2502] eta: 0:32:17 lr: 0.000020 loss_cls: 4.2001 (4.0530) grad_norm: 2.3915 (2.4613) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 11:54:27 root] (utils.py 283): INFO Epoch: [1] [ 30/2502] eta: 0:31:51 lr: 0.000020 loss_cls: 4.3226 (4.1189) grad_norm: 2.3681 (2.4540) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 11:54:35 root] (utils.py 283): INFO Epoch: [1] [ 40/2502] eta: 0:31:33 lr: 0.000020 loss_cls: 4.3226 (4.1189) grad_norm: 2.4270 (2.4469) time: 0.7569 data: 0.0002 max mem: 8426 +[2024-12-10 11:54:43 root] (utils.py 283): INFO Epoch: [1] [ 50/2502] eta: 0:31:20 lr: 0.000020 loss_cls: 4.2397 (4.1376) grad_norm: 2.3921 (2.4438) time: 0.7564 data: 0.0002 max mem: 8426 +[2024-12-10 11:54:50 root] (utils.py 283): INFO Epoch: [1] [ 60/2502] eta: 0:31:09 lr: 0.000020 loss_cls: 4.3569 (4.1802) grad_norm: 2.3822 (2.4335) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 11:54:58 root] (utils.py 283): INFO Epoch: [1] [ 70/2502] eta: 0:31:00 lr: 0.000020 loss_cls: 4.3170 (4.1909) grad_norm: 2.3639 (2.4326) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 11:55:05 root] (utils.py 283): INFO Epoch: [1] [ 80/2502] eta: 0:30:51 lr: 0.000020 loss_cls: 4.1165 (4.1757) grad_norm: 2.3587 (2.4229) time: 0.7601 data: 0.0003 max mem: 8426 +[2024-12-10 11:55:13 root] (utils.py 283): INFO Epoch: [1] [ 90/2502] eta: 0:30:42 lr: 0.000020 loss_cls: 4.0443 (4.1480) grad_norm: 2.3059 (2.4099) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 11:55:21 root] (utils.py 283): INFO Epoch: [1] [ 100/2502] eta: 0:30:34 lr: 0.000020 loss_cls: 4.2226 (4.1510) grad_norm: 2.3428 (2.4142) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 11:55:28 root] (utils.py 283): INFO Epoch: [1] [ 110/2502] eta: 0:30:26 lr: 0.000020 loss_cls: 4.1391 (4.1137) grad_norm: 2.3733 (2.4127) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 11:55:36 root] (utils.py 283): INFO Epoch: [1] [ 120/2502] eta: 0:30:20 lr: 0.000020 loss_cls: 3.8846 (4.1196) grad_norm: 2.4786 (2.4184) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 11:55:44 root] (utils.py 283): INFO Epoch: [1] [ 130/2502] eta: 0:30:12 lr: 0.000020 loss_cls: 4.2087 (4.1178) grad_norm: 2.4657 (2.4187) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 11:55:51 root] (utils.py 283): INFO Epoch: [1] [ 140/2502] eta: 0:30:04 lr: 0.000020 loss_cls: 4.1580 (4.1254) grad_norm: 2.4026 (2.4175) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 11:55:59 root] (utils.py 283): INFO Epoch: [1] [ 150/2502] eta: 0:29:56 lr: 0.000020 loss_cls: 4.0902 (4.1099) grad_norm: 2.3151 (2.4115) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:06 root] (utils.py 283): INFO Epoch: [1] [ 160/2502] eta: 0:29:49 lr: 0.000020 loss_cls: 4.3644 (4.1155) grad_norm: 2.2883 (2.4040) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:14 root] (utils.py 283): INFO Epoch: [1] [ 170/2502] eta: 0:29:41 lr: 0.000020 loss_cls: 4.3344 (4.1172) grad_norm: 2.2798 (2.3998) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:22 root] (utils.py 283): INFO Epoch: [1] [ 180/2502] eta: 0:29:34 lr: 0.000020 loss_cls: 4.3912 (4.1259) grad_norm: 2.3393 (2.3995) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:29 root] (utils.py 283): INFO Epoch: [1] [ 190/2502] eta: 0:29:26 lr: 0.000020 loss_cls: 4.3912 (4.1283) grad_norm: 2.4496 (2.4029) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:37 root] (utils.py 283): INFO Epoch: [1] [ 200/2502] eta: 0:29:18 lr: 0.000020 loss_cls: 4.3341 (4.1398) grad_norm: 2.4305 (2.4017) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:45 root] (utils.py 283): INFO Epoch: [1] [ 210/2502] eta: 0:29:10 lr: 0.000020 loss_cls: 4.3341 (4.1498) grad_norm: 2.3763 (2.4014) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 11:56:52 root] (utils.py 283): INFO Epoch: [1] [ 220/2502] eta: 0:29:03 lr: 0.000020 loss_cls: 4.1522 (4.1419) grad_norm: 2.3872 (2.4006) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 11:57:00 root] (utils.py 283): INFO Epoch: [1] [ 230/2502] eta: 0:28:55 lr: 0.000020 loss_cls: 4.3031 (4.1561) grad_norm: 2.3872 (2.4032) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 11:57:08 root] (utils.py 283): INFO Epoch: [1] [ 240/2502] eta: 0:28:48 lr: 0.000020 loss_cls: 4.1641 (4.1357) grad_norm: 2.3825 (2.4034) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 11:57:15 root] (utils.py 283): INFO Epoch: [1] [ 250/2502] eta: 0:28:41 lr: 0.000020 loss_cls: 3.8133 (4.1262) grad_norm: 2.3756 (2.4008) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 11:57:23 root] (utils.py 283): INFO Epoch: [1] [ 260/2502] eta: 0:28:33 lr: 0.000020 loss_cls: 4.1449 (4.1187) grad_norm: 2.3440 (2.4000) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 11:57:31 root] (utils.py 283): INFO Epoch: [1] [ 270/2502] eta: 0:28:25 lr: 0.000020 loss_cls: 4.3771 (4.1252) grad_norm: 2.3251 (2.3954) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 11:57:38 root] (utils.py 283): INFO Epoch: [1] [ 280/2502] eta: 0:28:19 lr: 0.000020 loss_cls: 4.3212 (4.1198) grad_norm: 2.2852 (2.3917) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 11:57:46 root] (utils.py 283): INFO Epoch: [1] [ 290/2502] eta: 0:28:12 lr: 0.000020 loss_cls: 3.6804 (4.1005) grad_norm: 2.3088 (2.3900) time: 0.7748 data: 0.0002 max mem: 8426 +[2024-12-10 11:57:54 root] (utils.py 283): INFO Epoch: [1] [ 300/2502] eta: 0:28:05 lr: 0.000020 loss_cls: 3.6603 (4.1002) grad_norm: 2.4046 (2.3920) time: 0.7753 data: 0.0002 max mem: 8426 +[2024-12-10 11:58:02 root] (utils.py 283): INFO Epoch: [1] [ 310/2502] eta: 0:27:58 lr: 0.000020 loss_cls: 4.1866 (4.1002) grad_norm: 2.4374 (2.3914) time: 0.7725 data: 0.0003 max mem: 8426 +[2024-12-10 11:58:09 root] (utils.py 283): INFO Epoch: [1] [ 320/2502] eta: 0:27:50 lr: 0.000020 loss_cls: 4.0821 (4.0986) grad_norm: 2.4127 (2.3920) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 11:58:17 root] (utils.py 283): INFO Epoch: [1] [ 330/2502] eta: 0:27:42 lr: 0.000020 loss_cls: 4.0821 (4.0943) grad_norm: 2.4048 (2.3914) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 11:58:24 root] (utils.py 283): INFO Epoch: [1] [ 340/2502] eta: 0:27:34 lr: 0.000020 loss_cls: 3.8311 (4.0889) grad_norm: 2.3390 (2.3916) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 11:58:32 root] (utils.py 283): INFO Epoch: [1] [ 350/2502] eta: 0:27:26 lr: 0.000020 loss_cls: 3.8311 (4.0845) grad_norm: 2.3999 (2.3939) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 11:58:40 root] (utils.py 283): INFO Epoch: [1] [ 360/2502] eta: 0:27:19 lr: 0.000020 loss_cls: 3.4936 (4.0648) grad_norm: 2.4108 (2.3946) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 11:58:47 root] (utils.py 283): INFO Epoch: [1] [ 370/2502] eta: 0:27:11 lr: 0.000020 loss_cls: 3.4763 (4.0604) grad_norm: 2.3732 (2.3943) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 11:58:55 root] (utils.py 283): INFO Epoch: [1] [ 380/2502] eta: 0:27:03 lr: 0.000020 loss_cls: 4.1279 (4.0599) grad_norm: 2.3379 (2.3935) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 11:59:03 root] (utils.py 283): INFO Epoch: [1] [ 390/2502] eta: 0:26:55 lr: 0.000020 loss_cls: 4.1279 (4.0547) grad_norm: 2.3571 (2.3971) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 11:59:10 root] (utils.py 283): INFO Epoch: [1] [ 400/2502] eta: 0:26:48 lr: 0.000020 loss_cls: 4.0445 (4.0533) grad_norm: 2.4122 (2.3961) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 11:59:18 root] (utils.py 283): INFO Epoch: [1] [ 410/2502] eta: 0:26:40 lr: 0.000020 loss_cls: 4.0445 (4.0510) grad_norm: 2.3823 (2.3960) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 11:59:26 root] (utils.py 283): INFO Epoch: [1] [ 420/2502] eta: 0:26:33 lr: 0.000020 loss_cls: 3.9517 (4.0514) grad_norm: 2.3796 (2.3950) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 11:59:33 root] (utils.py 283): INFO Epoch: [1] [ 430/2502] eta: 0:26:25 lr: 0.000020 loss_cls: 4.2601 (4.0561) grad_norm: 2.3474 (2.3938) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 11:59:41 root] (utils.py 283): INFO Epoch: [1] [ 440/2502] eta: 0:26:17 lr: 0.000020 loss_cls: 4.3643 (4.0613) grad_norm: 2.3697 (2.3940) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 11:59:48 root] (utils.py 283): INFO Epoch: [1] [ 450/2502] eta: 0:26:09 lr: 0.000020 loss_cls: 4.0475 (4.0525) grad_norm: 2.3835 (2.3947) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 11:59:56 root] (utils.py 283): INFO Epoch: [1] [ 460/2502] eta: 0:26:01 lr: 0.000020 loss_cls: 3.8282 (4.0508) grad_norm: 2.3996 (2.3969) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 12:00:04 root] (utils.py 283): INFO Epoch: [1] [ 470/2502] eta: 0:25:53 lr: 0.000020 loss_cls: 4.1259 (4.0533) grad_norm: 2.3917 (2.3964) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 12:00:11 root] (utils.py 283): INFO Epoch: [1] [ 480/2502] eta: 0:25:46 lr: 0.000020 loss_cls: 4.1259 (4.0491) grad_norm: 2.3804 (2.3970) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 12:00:19 root] (utils.py 283): INFO Epoch: [1] [ 490/2502] eta: 0:25:38 lr: 0.000020 loss_cls: 4.2671 (4.0538) grad_norm: 2.4069 (2.3984) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 12:00:27 root] (utils.py 283): INFO Epoch: [1] [ 500/2502] eta: 0:25:31 lr: 0.000020 loss_cls: 4.3104 (4.0519) grad_norm: 2.4024 (2.3984) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 12:00:34 root] (utils.py 283): INFO Epoch: [1] [ 510/2502] eta: 0:25:23 lr: 0.000020 loss_cls: 4.0079 (4.0509) grad_norm: 2.4420 (2.4013) time: 0.7702 data: 0.0003 max mem: 8426 +[2024-12-10 12:00:42 root] (utils.py 283): INFO Epoch: [1] [ 520/2502] eta: 0:25:16 lr: 0.000020 loss_cls: 4.0079 (4.0496) grad_norm: 2.4251 (2.4015) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 12:00:50 root] (utils.py 283): INFO Epoch: [1] [ 530/2502] eta: 0:25:08 lr: 0.000020 loss_cls: 3.9546 (4.0472) grad_norm: 2.3934 (2.4015) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 12:00:57 root] (utils.py 283): INFO Epoch: [1] [ 540/2502] eta: 0:25:01 lr: 0.000020 loss_cls: 4.1593 (4.0465) grad_norm: 2.3842 (2.4019) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 12:01:05 root] (utils.py 283): INFO Epoch: [1] [ 550/2502] eta: 0:24:53 lr: 0.000020 loss_cls: 4.2423 (4.0495) grad_norm: 2.3915 (2.4012) time: 0.7788 data: 0.0003 max mem: 8426 +[2024-12-10 12:01:13 root] (utils.py 283): INFO Epoch: [1] [ 560/2502] eta: 0:24:46 lr: 0.000020 loss_cls: 4.2181 (4.0473) grad_norm: 2.3715 (2.4011) time: 0.7743 data: 0.0003 max mem: 8426 +[2024-12-10 12:01:21 root] (utils.py 283): INFO Epoch: [1] [ 570/2502] eta: 0:24:38 lr: 0.000020 loss_cls: 3.8557 (4.0466) grad_norm: 2.3455 (2.4004) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 12:01:28 root] (utils.py 283): INFO Epoch: [1] [ 580/2502] eta: 0:24:31 lr: 0.000020 loss_cls: 3.8087 (4.0417) grad_norm: 2.2997 (2.3986) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 12:01:36 root] (utils.py 283): INFO Epoch: [1] [ 590/2502] eta: 0:24:23 lr: 0.000020 loss_cls: 4.1673 (4.0413) grad_norm: 2.3083 (2.3989) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 12:01:44 root] (utils.py 283): INFO Epoch: [1] [ 600/2502] eta: 0:24:15 lr: 0.000020 loss_cls: 4.3035 (4.0415) grad_norm: 2.3678 (2.3982) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 12:01:51 root] (utils.py 283): INFO Epoch: [1] [ 610/2502] eta: 0:24:08 lr: 0.000020 loss_cls: 4.2883 (4.0414) grad_norm: 2.3205 (2.3973) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 12:01:59 root] (utils.py 283): INFO Epoch: [1] [ 620/2502] eta: 0:24:00 lr: 0.000020 loss_cls: 4.1264 (4.0430) grad_norm: 2.3134 (2.3959) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 12:02:06 root] (utils.py 283): INFO Epoch: [1] [ 630/2502] eta: 0:23:52 lr: 0.000020 loss_cls: 4.1264 (4.0452) grad_norm: 2.3372 (2.3964) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 12:02:14 root] (utils.py 283): INFO Epoch: [1] [ 640/2502] eta: 0:23:44 lr: 0.000020 loss_cls: 3.9191 (4.0387) grad_norm: 2.4210 (2.3972) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 12:02:22 root] (utils.py 283): INFO Epoch: [1] [ 650/2502] eta: 0:23:37 lr: 0.000020 loss_cls: 4.0530 (4.0394) grad_norm: 2.4337 (2.3978) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 12:02:29 root] (utils.py 283): INFO Epoch: [1] [ 660/2502] eta: 0:23:29 lr: 0.000020 loss_cls: 4.2972 (4.0380) grad_norm: 2.4337 (2.3989) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 12:02:37 root] (utils.py 283): INFO Epoch: [1] [ 670/2502] eta: 0:23:21 lr: 0.000020 loss_cls: 4.1923 (4.0387) grad_norm: 2.3785 (2.3988) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 12:02:45 root] (utils.py 283): INFO Epoch: [1] [ 680/2502] eta: 0:23:14 lr: 0.000020 loss_cls: 4.2463 (4.0390) grad_norm: 2.3369 (2.3977) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 12:02:52 root] (utils.py 283): INFO Epoch: [1] [ 690/2502] eta: 0:23:06 lr: 0.000020 loss_cls: 3.9604 (4.0359) grad_norm: 2.3312 (2.3980) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 12:03:00 root] (utils.py 283): INFO Epoch: [1] [ 700/2502] eta: 0:22:58 lr: 0.000020 loss_cls: 4.0986 (4.0372) grad_norm: 2.3375 (2.3979) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 12:03:08 root] (utils.py 283): INFO Epoch: [1] [ 710/2502] eta: 0:22:51 lr: 0.000020 loss_cls: 4.3533 (4.0395) grad_norm: 2.4019 (2.3984) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 12:03:15 root] (utils.py 283): INFO Epoch: [1] [ 720/2502] eta: 0:22:43 lr: 0.000020 loss_cls: 4.3107 (4.0379) grad_norm: 2.4134 (2.3982) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:03:23 root] (utils.py 283): INFO Epoch: [1] [ 730/2502] eta: 0:22:35 lr: 0.000020 loss_cls: 4.2787 (4.0421) grad_norm: 2.3272 (2.3975) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 12:03:31 root] (utils.py 283): INFO Epoch: [1] [ 740/2502] eta: 0:22:28 lr: 0.000020 loss_cls: 4.3699 (4.0466) grad_norm: 2.3272 (2.3970) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 12:03:38 root] (utils.py 283): INFO Epoch: [1] [ 750/2502] eta: 0:22:20 lr: 0.000020 loss_cls: 4.3142 (4.0466) grad_norm: 2.3682 (2.3976) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:03:46 root] (utils.py 283): INFO Epoch: [1] [ 760/2502] eta: 0:22:12 lr: 0.000020 loss_cls: 4.0590 (4.0474) grad_norm: 2.4219 (2.3983) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 12:03:54 root] (utils.py 283): INFO Epoch: [1] [ 770/2502] eta: 0:22:05 lr: 0.000020 loss_cls: 3.9378 (4.0438) grad_norm: 2.3547 (2.3977) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:01 root] (utils.py 283): INFO Epoch: [1] [ 780/2502] eta: 0:21:57 lr: 0.000020 loss_cls: 3.7707 (4.0425) grad_norm: 2.3547 (2.3978) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:09 root] (utils.py 283): INFO Epoch: [1] [ 790/2502] eta: 0:21:49 lr: 0.000020 loss_cls: 3.9023 (4.0426) grad_norm: 2.3860 (2.3978) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:16 root] (utils.py 283): INFO Epoch: [1] [ 800/2502] eta: 0:21:42 lr: 0.000020 loss_cls: 4.1346 (4.0441) grad_norm: 2.3729 (2.3976) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 12:04:24 root] (utils.py 283): INFO Epoch: [1] [ 810/2502] eta: 0:21:34 lr: 0.000020 loss_cls: 4.2623 (4.0446) grad_norm: 2.4123 (2.3980) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:32 root] (utils.py 283): INFO Epoch: [1] [ 820/2502] eta: 0:21:27 lr: 0.000020 loss_cls: 4.2257 (4.0445) grad_norm: 2.3837 (2.3977) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:40 root] (utils.py 283): INFO Epoch: [1] [ 830/2502] eta: 0:21:19 lr: 0.000020 loss_cls: 4.0950 (4.0421) grad_norm: 2.3326 (2.3970) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:47 root] (utils.py 283): INFO Epoch: [1] [ 840/2502] eta: 0:21:12 lr: 0.000020 loss_cls: 3.9267 (4.0387) grad_norm: 2.3145 (2.3962) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 12:04:55 root] (utils.py 283): INFO Epoch: [1] [ 850/2502] eta: 0:21:04 lr: 0.000020 loss_cls: 4.0580 (4.0403) grad_norm: 2.2695 (2.3955) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:03 root] (utils.py 283): INFO Epoch: [1] [ 860/2502] eta: 0:20:56 lr: 0.000020 loss_cls: 4.1544 (4.0396) grad_norm: 2.3429 (2.3960) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 12:05:10 root] (utils.py 283): INFO Epoch: [1] [ 870/2502] eta: 0:20:48 lr: 0.000020 loss_cls: 3.9682 (4.0374) grad_norm: 2.4289 (2.3964) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:18 root] (utils.py 283): INFO Epoch: [1] [ 880/2502] eta: 0:20:41 lr: 0.000020 loss_cls: 4.2436 (4.0402) grad_norm: 2.4356 (2.3970) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:25 root] (utils.py 283): INFO Epoch: [1] [ 890/2502] eta: 0:20:33 lr: 0.000020 loss_cls: 4.3318 (4.0420) grad_norm: 2.3974 (2.3964) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:33 root] (utils.py 283): INFO Epoch: [1] [ 900/2502] eta: 0:20:25 lr: 0.000020 loss_cls: 4.0638 (4.0402) grad_norm: 2.3772 (2.3962) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:41 root] (utils.py 283): INFO Epoch: [1] [ 910/2502] eta: 0:20:18 lr: 0.000020 loss_cls: 4.0856 (4.0411) grad_norm: 2.3772 (2.3961) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:48 root] (utils.py 283): INFO Epoch: [1] [ 920/2502] eta: 0:20:10 lr: 0.000020 loss_cls: 4.3376 (4.0429) grad_norm: 2.3185 (2.3964) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 12:05:56 root] (utils.py 283): INFO Epoch: [1] [ 930/2502] eta: 0:20:02 lr: 0.000020 loss_cls: 4.0419 (4.0410) grad_norm: 2.2671 (2.3953) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:06:04 root] (utils.py 283): INFO Epoch: [1] [ 940/2502] eta: 0:19:55 lr: 0.000020 loss_cls: 3.7698 (4.0395) grad_norm: 2.3430 (2.3954) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 12:06:11 root] (utils.py 283): INFO Epoch: [1] [ 950/2502] eta: 0:19:47 lr: 0.000020 loss_cls: 3.8523 (4.0392) grad_norm: 2.3991 (2.3956) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 12:06:19 root] (utils.py 283): INFO Epoch: [1] [ 960/2502] eta: 0:19:39 lr: 0.000020 loss_cls: 4.2578 (4.0399) grad_norm: 2.3728 (2.3952) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:06:26 root] (utils.py 283): INFO Epoch: [1] [ 970/2502] eta: 0:19:32 lr: 0.000020 loss_cls: 3.7623 (4.0357) grad_norm: 2.3665 (2.3952) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 12:06:34 root] (utils.py 283): INFO Epoch: [1] [ 980/2502] eta: 0:19:24 lr: 0.000020 loss_cls: 3.8074 (4.0352) grad_norm: 2.4164 (2.3958) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 12:06:42 root] (utils.py 283): INFO Epoch: [1] [ 990/2502] eta: 0:19:17 lr: 0.000020 loss_cls: 3.9818 (4.0360) grad_norm: 2.4047 (2.3958) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 12:06:50 root] (utils.py 283): INFO Epoch: [1] [1000/2502] eta: 0:19:09 lr: 0.000020 loss_cls: 3.9034 (4.0344) grad_norm: 2.3199 (2.3951) time: 0.7761 data: 0.0002 max mem: 8426 +[2024-12-10 12:06:57 root] (utils.py 283): INFO Epoch: [1] [1010/2502] eta: 0:19:02 lr: 0.000020 loss_cls: 4.0443 (4.0344) grad_norm: 2.3246 (2.3948) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 12:07:05 root] (utils.py 283): INFO Epoch: [1] [1020/2502] eta: 0:18:54 lr: 0.000020 loss_cls: 4.0443 (4.0325) grad_norm: 2.3813 (2.3948) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 12:07:13 root] (utils.py 283): INFO Epoch: [1] [1030/2502] eta: 0:18:46 lr: 0.000020 loss_cls: 3.9735 (4.0328) grad_norm: 2.3354 (2.3939) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 12:07:20 root] (utils.py 283): INFO Epoch: [1] [1040/2502] eta: 0:18:38 lr: 0.000020 loss_cls: 3.8298 (4.0298) grad_norm: 2.3031 (2.3936) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 12:07:28 root] (utils.py 283): INFO Epoch: [1] [1050/2502] eta: 0:18:31 lr: 0.000020 loss_cls: 3.8992 (4.0297) grad_norm: 2.3253 (2.3932) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 12:07:35 root] (utils.py 283): INFO Epoch: [1] [1060/2502] eta: 0:18:23 lr: 0.000020 loss_cls: 4.1813 (4.0322) grad_norm: 2.3503 (2.3933) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 12:07:43 root] (utils.py 283): INFO Epoch: [1] [1070/2502] eta: 0:18:15 lr: 0.000020 loss_cls: 4.5063 (4.0330) grad_norm: 2.3868 (2.3929) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 12:07:51 root] (utils.py 283): INFO Epoch: [1] [1080/2502] eta: 0:18:08 lr: 0.000020 loss_cls: 4.2804 (4.0345) grad_norm: 2.4314 (2.3933) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 12:07:58 root] (utils.py 283): INFO Epoch: [1] [1090/2502] eta: 0:18:00 lr: 0.000020 loss_cls: 4.2804 (4.0358) grad_norm: 2.3836 (2.3930) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 12:08:06 root] (utils.py 283): INFO Epoch: [1] [1100/2502] eta: 0:17:52 lr: 0.000020 loss_cls: 4.2375 (4.0343) grad_norm: 2.3258 (2.3926) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 12:08:14 root] (utils.py 283): INFO Epoch: [1] [1110/2502] eta: 0:17:45 lr: 0.000020 loss_cls: 4.2384 (4.0365) grad_norm: 2.3412 (2.3929) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 12:08:21 root] (utils.py 283): INFO Epoch: [1] [1120/2502] eta: 0:17:37 lr: 0.000020 loss_cls: 4.2384 (4.0374) grad_norm: 2.3789 (2.3927) time: 0.7597 data: 0.0003 max mem: 8426 +[2024-12-10 12:08:29 root] (utils.py 283): INFO Epoch: [1] [1130/2502] eta: 0:17:29 lr: 0.000020 loss_cls: 4.2525 (4.0395) grad_norm: 2.3659 (2.3930) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 12:08:36 root] (utils.py 283): INFO Epoch: [1] [1140/2502] eta: 0:17:21 lr: 0.000020 loss_cls: 4.2568 (4.0397) grad_norm: 2.3395 (2.3924) time: 0.7602 data: 0.0003 max mem: 8426 +[2024-12-10 12:08:44 root] (utils.py 283): INFO Epoch: [1] [1150/2502] eta: 0:17:14 lr: 0.000020 loss_cls: 4.2176 (4.0413) grad_norm: 2.3020 (2.3919) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 12:08:52 root] (utils.py 283): INFO Epoch: [1] [1160/2502] eta: 0:17:06 lr: 0.000020 loss_cls: 4.2504 (4.0414) grad_norm: 2.3142 (2.3916) time: 0.7603 data: 0.0003 max mem: 8426 +[2024-12-10 12:08:59 root] (utils.py 283): INFO Epoch: [1] [1170/2502] eta: 0:16:58 lr: 0.000020 loss_cls: 4.2791 (4.0420) grad_norm: 2.3116 (2.3909) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:07 root] (utils.py 283): INFO Epoch: [1] [1180/2502] eta: 0:16:51 lr: 0.000020 loss_cls: 4.2473 (4.0427) grad_norm: 2.3108 (2.3907) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:14 root] (utils.py 283): INFO Epoch: [1] [1190/2502] eta: 0:16:43 lr: 0.000020 loss_cls: 4.1446 (4.0418) grad_norm: 2.3130 (2.3901) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:22 root] (utils.py 283): INFO Epoch: [1] [1200/2502] eta: 0:16:35 lr: 0.000020 loss_cls: 4.0852 (4.0413) grad_norm: 2.3352 (2.3899) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:30 root] (utils.py 283): INFO Epoch: [1] [1210/2502] eta: 0:16:28 lr: 0.000020 loss_cls: 4.0565 (4.0406) grad_norm: 2.3352 (2.3896) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:37 root] (utils.py 283): INFO Epoch: [1] [1220/2502] eta: 0:16:20 lr: 0.000020 loss_cls: 4.2059 (4.0425) grad_norm: 2.3793 (2.3901) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:45 root] (utils.py 283): INFO Epoch: [1] [1230/2502] eta: 0:16:12 lr: 0.000020 loss_cls: 4.2059 (4.0431) grad_norm: 2.4148 (2.3901) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 12:09:53 root] (utils.py 283): INFO Epoch: [1] [1240/2502] eta: 0:16:05 lr: 0.000020 loss_cls: 4.1651 (4.0446) grad_norm: 2.3618 (2.3898) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 12:10:00 root] (utils.py 283): INFO Epoch: [1] [1250/2502] eta: 0:15:57 lr: 0.000020 loss_cls: 4.1857 (4.0431) grad_norm: 2.3307 (2.3895) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 12:10:08 root] (utils.py 283): INFO Epoch: [1] [1260/2502] eta: 0:15:49 lr: 0.000020 loss_cls: 4.1857 (4.0444) grad_norm: 2.3048 (2.3895) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:10:16 root] (utils.py 283): INFO Epoch: [1] [1270/2502] eta: 0:15:42 lr: 0.000020 loss_cls: 4.2175 (4.0438) grad_norm: 2.3331 (2.3895) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 12:10:23 root] (utils.py 283): INFO Epoch: [1] [1280/2502] eta: 0:15:34 lr: 0.000020 loss_cls: 4.1747 (4.0431) grad_norm: 2.3549 (2.3892) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 12:10:31 root] (utils.py 283): INFO Epoch: [1] [1290/2502] eta: 0:15:26 lr: 0.000020 loss_cls: 4.1971 (4.0443) grad_norm: 2.3571 (2.3896) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 12:10:38 root] (utils.py 283): INFO Epoch: [1] [1300/2502] eta: 0:15:19 lr: 0.000020 loss_cls: 4.1988 (4.0445) grad_norm: 2.3511 (2.3895) time: 0.7598 data: 0.0003 max mem: 8426 +[2024-12-10 12:10:46 root] (utils.py 283): INFO Epoch: [1] [1310/2502] eta: 0:15:11 lr: 0.000020 loss_cls: 3.9067 (4.0429) grad_norm: 2.3378 (2.3895) time: 0.7562 data: 0.0003 max mem: 8426 +[2024-12-10 12:10:54 root] (utils.py 283): INFO Epoch: [1] [1320/2502] eta: 0:15:03 lr: 0.000020 loss_cls: 4.0779 (4.0429) grad_norm: 2.3320 (2.3894) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 12:11:01 root] (utils.py 283): INFO Epoch: [1] [1330/2502] eta: 0:14:56 lr: 0.000020 loss_cls: 4.3959 (4.0440) grad_norm: 2.3326 (2.3891) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 12:11:09 root] (utils.py 283): INFO Epoch: [1] [1340/2502] eta: 0:14:48 lr: 0.000020 loss_cls: 4.3640 (4.0421) grad_norm: 2.3825 (2.3900) time: 0.7556 data: 0.0002 max mem: 8426 +[2024-12-10 12:11:16 root] (utils.py 283): INFO Epoch: [1] [1350/2502] eta: 0:14:40 lr: 0.000020 loss_cls: 4.2139 (4.0440) grad_norm: 2.3952 (2.3898) time: 0.7546 data: 0.0002 max mem: 8426 +[2024-12-10 12:11:24 root] (utils.py 283): INFO Epoch: [1] [1360/2502] eta: 0:14:32 lr: 0.000020 loss_cls: 4.1498 (4.0421) grad_norm: 2.3424 (2.3894) time: 0.7564 data: 0.0002 max mem: 8426 +[2024-12-10 12:11:31 root] (utils.py 283): INFO Epoch: [1] [1370/2502] eta: 0:14:25 lr: 0.000020 loss_cls: 4.0092 (4.0411) grad_norm: 2.3463 (2.3890) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 12:11:39 root] (utils.py 283): INFO Epoch: [1] [1380/2502] eta: 0:14:17 lr: 0.000020 loss_cls: 3.9980 (4.0393) grad_norm: 2.3855 (2.3887) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 12:11:47 root] (utils.py 283): INFO Epoch: [1] [1390/2502] eta: 0:14:09 lr: 0.000020 loss_cls: 3.9980 (4.0388) grad_norm: 2.3293 (2.3883) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 12:11:54 root] (utils.py 283): INFO Epoch: [1] [1400/2502] eta: 0:14:02 lr: 0.000020 loss_cls: 4.1618 (4.0376) grad_norm: 2.3369 (2.3879) time: 0.7569 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:02 root] (utils.py 283): INFO Epoch: [1] [1410/2502] eta: 0:13:54 lr: 0.000020 loss_cls: 3.9857 (4.0376) grad_norm: 2.4028 (2.3884) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:10 root] (utils.py 283): INFO Epoch: [1] [1420/2502] eta: 0:13:47 lr: 0.000020 loss_cls: 3.9857 (4.0380) grad_norm: 2.4406 (2.3889) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:17 root] (utils.py 283): INFO Epoch: [1] [1430/2502] eta: 0:13:39 lr: 0.000020 loss_cls: 4.1417 (4.0374) grad_norm: 2.3602 (2.3888) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:25 root] (utils.py 283): INFO Epoch: [1] [1440/2502] eta: 0:13:31 lr: 0.000020 loss_cls: 3.6322 (4.0352) grad_norm: 2.3217 (2.3885) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:32 root] (utils.py 283): INFO Epoch: [1] [1450/2502] eta: 0:13:23 lr: 0.000020 loss_cls: 3.6322 (4.0333) grad_norm: 2.4006 (2.3891) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:40 root] (utils.py 283): INFO Epoch: [1] [1460/2502] eta: 0:13:16 lr: 0.000020 loss_cls: 3.9543 (4.0338) grad_norm: 2.4263 (2.3890) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:48 root] (utils.py 283): INFO Epoch: [1] [1470/2502] eta: 0:13:08 lr: 0.000020 loss_cls: 4.2743 (4.0360) grad_norm: 2.3276 (2.3888) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 12:12:55 root] (utils.py 283): INFO Epoch: [1] [1480/2502] eta: 0:13:00 lr: 0.000020 loss_cls: 4.2741 (4.0358) grad_norm: 2.3525 (2.3890) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:13:03 root] (utils.py 283): INFO Epoch: [1] [1490/2502] eta: 0:12:53 lr: 0.000020 loss_cls: 3.9124 (4.0356) grad_norm: 2.3508 (2.3887) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 12:13:11 root] (utils.py 283): INFO Epoch: [1] [1500/2502] eta: 0:12:45 lr: 0.000020 loss_cls: 3.9124 (4.0342) grad_norm: 2.3498 (2.3889) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 12:13:18 root] (utils.py 283): INFO Epoch: [1] [1510/2502] eta: 0:12:38 lr: 0.000020 loss_cls: 3.7846 (4.0331) grad_norm: 2.4025 (2.3887) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 12:13:26 root] (utils.py 283): INFO Epoch: [1] [1520/2502] eta: 0:12:30 lr: 0.000020 loss_cls: 4.1271 (4.0337) grad_norm: 2.3657 (2.3886) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 12:13:34 root] (utils.py 283): INFO Epoch: [1] [1530/2502] eta: 0:12:22 lr: 0.000020 loss_cls: 3.9895 (4.0328) grad_norm: 2.4273 (2.3890) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 12:13:41 root] (utils.py 283): INFO Epoch: [1] [1540/2502] eta: 0:12:15 lr: 0.000020 loss_cls: 3.9895 (4.0340) grad_norm: 2.4369 (2.3895) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 12:13:49 root] (utils.py 283): INFO Epoch: [1] [1550/2502] eta: 0:12:07 lr: 0.000020 loss_cls: 4.3891 (4.0362) grad_norm: 2.3747 (2.3892) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 12:13:57 root] (utils.py 283): INFO Epoch: [1] [1560/2502] eta: 0:11:59 lr: 0.000020 loss_cls: 4.3521 (4.0370) grad_norm: 2.3167 (2.3897) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:04 root] (utils.py 283): INFO Epoch: [1] [1570/2502] eta: 0:11:52 lr: 0.000020 loss_cls: 3.7424 (4.0338) grad_norm: 2.3631 (2.3894) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:12 root] (utils.py 283): INFO Epoch: [1] [1580/2502] eta: 0:11:44 lr: 0.000020 loss_cls: 3.4940 (4.0318) grad_norm: 2.3631 (2.3894) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:20 root] (utils.py 283): INFO Epoch: [1] [1590/2502] eta: 0:11:36 lr: 0.000020 loss_cls: 3.6789 (4.0307) grad_norm: 2.3528 (2.3892) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:27 root] (utils.py 283): INFO Epoch: [1] [1600/2502] eta: 0:11:29 lr: 0.000020 loss_cls: 3.8193 (4.0285) grad_norm: 2.3528 (2.3896) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:35 root] (utils.py 283): INFO Epoch: [1] [1610/2502] eta: 0:11:21 lr: 0.000020 loss_cls: 3.8976 (4.0285) grad_norm: 2.4173 (2.3897) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:43 root] (utils.py 283): INFO Epoch: [1] [1620/2502] eta: 0:11:14 lr: 0.000020 loss_cls: 3.9608 (4.0281) grad_norm: 2.4465 (2.3900) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:50 root] (utils.py 283): INFO Epoch: [1] [1630/2502] eta: 0:11:06 lr: 0.000020 loss_cls: 3.9838 (4.0277) grad_norm: 2.4086 (2.3900) time: 0.7747 data: 0.0002 max mem: 8426 +[2024-12-10 12:14:58 root] (utils.py 283): INFO Epoch: [1] [1640/2502] eta: 0:10:58 lr: 0.000020 loss_cls: 4.1223 (4.0281) grad_norm: 2.4068 (2.3902) time: 0.7766 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:06 root] (utils.py 283): INFO Epoch: [1] [1650/2502] eta: 0:10:51 lr: 0.000020 loss_cls: 3.9869 (4.0274) grad_norm: 2.4068 (2.3902) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:13 root] (utils.py 283): INFO Epoch: [1] [1660/2502] eta: 0:10:43 lr: 0.000020 loss_cls: 3.9221 (4.0261) grad_norm: 2.4064 (2.3901) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:21 root] (utils.py 283): INFO Epoch: [1] [1670/2502] eta: 0:10:35 lr: 0.000020 loss_cls: 4.1118 (4.0277) grad_norm: 2.3854 (2.3899) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:29 root] (utils.py 283): INFO Epoch: [1] [1680/2502] eta: 0:10:28 lr: 0.000020 loss_cls: 4.1227 (4.0286) grad_norm: 2.3203 (2.3896) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:36 root] (utils.py 283): INFO Epoch: [1] [1690/2502] eta: 0:10:20 lr: 0.000020 loss_cls: 3.9043 (4.0268) grad_norm: 2.3218 (2.3893) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:44 root] (utils.py 283): INFO Epoch: [1] [1700/2502] eta: 0:10:13 lr: 0.000020 loss_cls: 4.0124 (4.0273) grad_norm: 2.4007 (2.3896) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 12:15:51 root] (utils.py 283): INFO Epoch: [1] [1710/2502] eta: 0:10:05 lr: 0.000020 loss_cls: 4.0944 (4.0272) grad_norm: 2.4126 (2.3895) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 12:15:59 root] (utils.py 283): INFO Epoch: [1] [1720/2502] eta: 0:09:57 lr: 0.000020 loss_cls: 4.2423 (4.0282) grad_norm: 2.3460 (2.3893) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:07 root] (utils.py 283): INFO Epoch: [1] [1730/2502] eta: 0:09:50 lr: 0.000020 loss_cls: 4.2521 (4.0262) grad_norm: 2.3731 (2.3895) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:14 root] (utils.py 283): INFO Epoch: [1] [1740/2502] eta: 0:09:42 lr: 0.000020 loss_cls: 3.7820 (4.0265) grad_norm: 2.3977 (2.3893) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:22 root] (utils.py 283): INFO Epoch: [1] [1750/2502] eta: 0:09:34 lr: 0.000020 loss_cls: 4.1591 (4.0271) grad_norm: 2.3977 (2.3897) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:30 root] (utils.py 283): INFO Epoch: [1] [1760/2502] eta: 0:09:27 lr: 0.000020 loss_cls: 4.1591 (4.0271) grad_norm: 2.4025 (2.3897) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:37 root] (utils.py 283): INFO Epoch: [1] [1770/2502] eta: 0:09:19 lr: 0.000020 loss_cls: 3.9819 (4.0260) grad_norm: 2.3816 (2.3894) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:45 root] (utils.py 283): INFO Epoch: [1] [1780/2502] eta: 0:09:11 lr: 0.000020 loss_cls: 3.9819 (4.0254) grad_norm: 2.3695 (2.3896) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 12:16:53 root] (utils.py 283): INFO Epoch: [1] [1790/2502] eta: 0:09:04 lr: 0.000020 loss_cls: 4.0032 (4.0251) grad_norm: 2.4064 (2.3895) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:00 root] (utils.py 283): INFO Epoch: [1] [1800/2502] eta: 0:08:56 lr: 0.000020 loss_cls: 3.9378 (4.0244) grad_norm: 2.4064 (2.3896) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:08 root] (utils.py 283): INFO Epoch: [1] [1810/2502] eta: 0:08:49 lr: 0.000020 loss_cls: 4.2311 (4.0256) grad_norm: 2.3948 (2.3894) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:16 root] (utils.py 283): INFO Epoch: [1] [1820/2502] eta: 0:08:41 lr: 0.000020 loss_cls: 4.2714 (4.0260) grad_norm: 2.3018 (2.3889) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:24 root] (utils.py 283): INFO Epoch: [1] [1830/2502] eta: 0:08:33 lr: 0.000020 loss_cls: 4.0567 (4.0256) grad_norm: 2.2799 (2.3885) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:32 root] (utils.py 283): INFO Epoch: [1] [1840/2502] eta: 0:08:26 lr: 0.000020 loss_cls: 4.0567 (4.0240) grad_norm: 2.3011 (2.3881) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:39 root] (utils.py 283): INFO Epoch: [1] [1850/2502] eta: 0:08:18 lr: 0.000020 loss_cls: 4.0849 (4.0238) grad_norm: 2.3218 (2.3880) time: 0.7796 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:47 root] (utils.py 283): INFO Epoch: [1] [1860/2502] eta: 0:08:11 lr: 0.000020 loss_cls: 4.1592 (4.0237) grad_norm: 2.3735 (2.3883) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 12:17:55 root] (utils.py 283): INFO Epoch: [1] [1870/2502] eta: 0:08:03 lr: 0.000020 loss_cls: 4.2829 (4.0246) grad_norm: 2.3735 (2.3882) time: 0.7760 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:03 root] (utils.py 283): INFO Epoch: [1] [1880/2502] eta: 0:07:55 lr: 0.000020 loss_cls: 4.2438 (4.0241) grad_norm: 2.3767 (2.3881) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:10 root] (utils.py 283): INFO Epoch: [1] [1890/2502] eta: 0:07:48 lr: 0.000020 loss_cls: 4.0789 (4.0245) grad_norm: 2.3767 (2.3880) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:18 root] (utils.py 283): INFO Epoch: [1] [1900/2502] eta: 0:07:40 lr: 0.000020 loss_cls: 4.2557 (4.0256) grad_norm: 2.3617 (2.3880) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:26 root] (utils.py 283): INFO Epoch: [1] [1910/2502] eta: 0:07:32 lr: 0.000020 loss_cls: 4.2601 (4.0255) grad_norm: 2.3617 (2.3878) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:33 root] (utils.py 283): INFO Epoch: [1] [1920/2502] eta: 0:07:25 lr: 0.000020 loss_cls: 4.2595 (4.0259) grad_norm: 2.3798 (2.3879) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:41 root] (utils.py 283): INFO Epoch: [1] [1930/2502] eta: 0:07:17 lr: 0.000020 loss_cls: 4.1476 (4.0246) grad_norm: 2.3651 (2.3880) time: 0.7820 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:49 root] (utils.py 283): INFO Epoch: [1] [1940/2502] eta: 0:07:10 lr: 0.000020 loss_cls: 4.2271 (4.0255) grad_norm: 2.3583 (2.3881) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 12:18:56 root] (utils.py 283): INFO Epoch: [1] [1950/2502] eta: 0:07:02 lr: 0.000020 loss_cls: 4.3658 (4.0257) grad_norm: 2.4177 (2.3883) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 12:19:04 root] (utils.py 283): INFO Epoch: [1] [1960/2502] eta: 0:06:54 lr: 0.000020 loss_cls: 4.0967 (4.0241) grad_norm: 2.3800 (2.3880) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 12:19:12 root] (utils.py 283): INFO Epoch: [1] [1970/2502] eta: 0:06:47 lr: 0.000020 loss_cls: 4.1790 (4.0264) grad_norm: 2.3508 (2.3882) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 12:19:19 root] (utils.py 283): INFO Epoch: [1] [1980/2502] eta: 0:06:39 lr: 0.000020 loss_cls: 4.4491 (4.0265) grad_norm: 2.3680 (2.3885) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 12:19:27 root] (utils.py 283): INFO Epoch: [1] [1990/2502] eta: 0:06:31 lr: 0.000020 loss_cls: 3.8179 (4.0242) grad_norm: 2.3680 (2.3884) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 12:19:35 root] (utils.py 283): INFO Epoch: [1] [2000/2502] eta: 0:06:24 lr: 0.000020 loss_cls: 4.0424 (4.0248) grad_norm: 2.3505 (2.3883) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 12:19:42 root] (utils.py 283): INFO Epoch: [1] [2010/2502] eta: 0:06:16 lr: 0.000020 loss_cls: 4.2426 (4.0254) grad_norm: 2.3196 (2.3878) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 12:19:50 root] (utils.py 283): INFO Epoch: [1] [2020/2502] eta: 0:06:08 lr: 0.000020 loss_cls: 4.1561 (4.0251) grad_norm: 2.2964 (2.3875) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 12:19:58 root] (utils.py 283): INFO Epoch: [1] [2030/2502] eta: 0:06:01 lr: 0.000020 loss_cls: 4.0884 (4.0255) grad_norm: 2.3226 (2.3873) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:05 root] (utils.py 283): INFO Epoch: [1] [2040/2502] eta: 0:05:53 lr: 0.000020 loss_cls: 4.2549 (4.0249) grad_norm: 2.3518 (2.3872) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:13 root] (utils.py 283): INFO Epoch: [1] [2050/2502] eta: 0:05:45 lr: 0.000020 loss_cls: 4.0332 (4.0255) grad_norm: 2.3628 (2.3870) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:21 root] (utils.py 283): INFO Epoch: [1] [2060/2502] eta: 0:05:38 lr: 0.000020 loss_cls: 4.3700 (4.0265) grad_norm: 2.3046 (2.3869) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:28 root] (utils.py 283): INFO Epoch: [1] [2070/2502] eta: 0:05:30 lr: 0.000020 loss_cls: 4.3954 (4.0266) grad_norm: 2.2852 (2.3865) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:36 root] (utils.py 283): INFO Epoch: [1] [2080/2502] eta: 0:05:22 lr: 0.000020 loss_cls: 4.3337 (4.0273) grad_norm: 2.2971 (2.3865) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:44 root] (utils.py 283): INFO Epoch: [1] [2090/2502] eta: 0:05:15 lr: 0.000020 loss_cls: 4.3337 (4.0279) grad_norm: 2.3595 (2.3865) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:51 root] (utils.py 283): INFO Epoch: [1] [2100/2502] eta: 0:05:07 lr: 0.000020 loss_cls: 4.0811 (4.0280) grad_norm: 2.3375 (2.3862) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 12:20:59 root] (utils.py 283): INFO Epoch: [1] [2110/2502] eta: 0:04:59 lr: 0.000020 loss_cls: 3.9154 (4.0267) grad_norm: 2.2885 (2.3858) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:06 root] (utils.py 283): INFO Epoch: [1] [2120/2502] eta: 0:04:52 lr: 0.000020 loss_cls: 4.0879 (4.0276) grad_norm: 2.2736 (2.3856) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:14 root] (utils.py 283): INFO Epoch: [1] [2130/2502] eta: 0:04:44 lr: 0.000020 loss_cls: 4.3088 (4.0276) grad_norm: 2.3273 (2.3856) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:22 root] (utils.py 283): INFO Epoch: [1] [2140/2502] eta: 0:04:36 lr: 0.000020 loss_cls: 4.1673 (4.0288) grad_norm: 2.3119 (2.3854) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:29 root] (utils.py 283): INFO Epoch: [1] [2150/2502] eta: 0:04:29 lr: 0.000020 loss_cls: 4.1568 (4.0290) grad_norm: 2.2789 (2.3851) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:37 root] (utils.py 283): INFO Epoch: [1] [2160/2502] eta: 0:04:21 lr: 0.000020 loss_cls: 4.1587 (4.0302) grad_norm: 2.2652 (2.3847) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:45 root] (utils.py 283): INFO Epoch: [1] [2170/2502] eta: 0:04:13 lr: 0.000020 loss_cls: 4.1979 (4.0302) grad_norm: 2.2951 (2.3845) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 12:21:52 root] (utils.py 283): INFO Epoch: [1] [2180/2502] eta: 0:04:06 lr: 0.000020 loss_cls: 4.1979 (4.0307) grad_norm: 2.4026 (2.3848) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:00 root] (utils.py 283): INFO Epoch: [1] [2190/2502] eta: 0:03:58 lr: 0.000020 loss_cls: 4.3390 (4.0322) grad_norm: 2.3495 (2.3845) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:08 root] (utils.py 283): INFO Epoch: [1] [2200/2502] eta: 0:03:51 lr: 0.000020 loss_cls: 4.3390 (4.0333) grad_norm: 2.2809 (2.3843) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:15 root] (utils.py 283): INFO Epoch: [1] [2210/2502] eta: 0:03:43 lr: 0.000020 loss_cls: 4.0709 (4.0331) grad_norm: 2.3647 (2.3846) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:23 root] (utils.py 283): INFO Epoch: [1] [2220/2502] eta: 0:03:35 lr: 0.000020 loss_cls: 4.2465 (4.0343) grad_norm: 2.4665 (2.3848) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:30 root] (utils.py 283): INFO Epoch: [1] [2230/2502] eta: 0:03:28 lr: 0.000020 loss_cls: 4.2465 (4.0332) grad_norm: 2.3664 (2.3849) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:38 root] (utils.py 283): INFO Epoch: [1] [2240/2502] eta: 0:03:20 lr: 0.000020 loss_cls: 4.2453 (4.0343) grad_norm: 2.3177 (2.3845) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:46 root] (utils.py 283): INFO Epoch: [1] [2250/2502] eta: 0:03:12 lr: 0.000020 loss_cls: 4.2453 (4.0334) grad_norm: 2.3128 (2.3844) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 12:22:53 root] (utils.py 283): INFO Epoch: [1] [2260/2502] eta: 0:03:05 lr: 0.000020 loss_cls: 4.0438 (4.0334) grad_norm: 2.3085 (2.3841) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 12:23:01 root] (utils.py 283): INFO Epoch: [1] [2270/2502] eta: 0:02:57 lr: 0.000020 loss_cls: 3.9916 (4.0323) grad_norm: 2.3052 (2.3839) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 12:23:09 root] (utils.py 283): INFO Epoch: [1] [2280/2502] eta: 0:02:49 lr: 0.000020 loss_cls: 3.9461 (4.0321) grad_norm: 2.3556 (2.3839) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 12:23:16 root] (utils.py 283): INFO Epoch: [1] [2290/2502] eta: 0:02:42 lr: 0.000020 loss_cls: 4.1294 (4.0324) grad_norm: 2.3541 (2.3837) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:23:24 root] (utils.py 283): INFO Epoch: [1] [2300/2502] eta: 0:02:34 lr: 0.000020 loss_cls: 4.2241 (4.0328) grad_norm: 2.3228 (2.3836) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 12:23:31 root] (utils.py 283): INFO Epoch: [1] [2310/2502] eta: 0:02:26 lr: 0.000020 loss_cls: 4.2171 (4.0329) grad_norm: 2.2601 (2.3833) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 12:23:39 root] (utils.py 283): INFO Epoch: [1] [2320/2502] eta: 0:02:19 lr: 0.000020 loss_cls: 4.0195 (4.0318) grad_norm: 2.2779 (2.3831) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 12:23:47 root] (utils.py 283): INFO Epoch: [1] [2330/2502] eta: 0:02:11 lr: 0.000020 loss_cls: 4.0690 (4.0324) grad_norm: 2.3020 (2.3830) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 12:23:54 root] (utils.py 283): INFO Epoch: [1] [2340/2502] eta: 0:02:03 lr: 0.000020 loss_cls: 4.2609 (4.0328) grad_norm: 2.3727 (2.3828) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:02 root] (utils.py 283): INFO Epoch: [1] [2350/2502] eta: 0:01:56 lr: 0.000020 loss_cls: 4.2658 (4.0321) grad_norm: 2.3727 (2.3830) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:10 root] (utils.py 283): INFO Epoch: [1] [2360/2502] eta: 0:01:48 lr: 0.000020 loss_cls: 3.8754 (4.0307) grad_norm: 2.4022 (2.3832) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:18 root] (utils.py 283): INFO Epoch: [1] [2370/2502] eta: 0:01:40 lr: 0.000020 loss_cls: 3.6820 (4.0291) grad_norm: 2.4392 (2.3833) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:25 root] (utils.py 283): INFO Epoch: [1] [2380/2502] eta: 0:01:33 lr: 0.000020 loss_cls: 3.8994 (4.0294) grad_norm: 2.3038 (2.3828) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:33 root] (utils.py 283): INFO Epoch: [1] [2390/2502] eta: 0:01:25 lr: 0.000020 loss_cls: 4.0446 (4.0292) grad_norm: 2.2483 (2.3825) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:40 root] (utils.py 283): INFO Epoch: [1] [2400/2502] eta: 0:01:18 lr: 0.000020 loss_cls: 3.9089 (4.0285) grad_norm: 2.2925 (2.3825) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:48 root] (utils.py 283): INFO Epoch: [1] [2410/2502] eta: 0:01:10 lr: 0.000020 loss_cls: 4.0380 (4.0284) grad_norm: 2.3264 (2.3822) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 12:24:56 root] (utils.py 283): INFO Epoch: [1] [2420/2502] eta: 0:01:02 lr: 0.000020 loss_cls: 4.0646 (4.0288) grad_norm: 2.3012 (2.3819) time: 0.7547 data: 0.0003 max mem: 8426 +[2024-12-10 12:25:03 root] (utils.py 283): INFO Epoch: [1] [2430/2502] eta: 0:00:55 lr: 0.000020 loss_cls: 4.2920 (4.0293) grad_norm: 2.3348 (2.3819) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 12:25:11 root] (utils.py 283): INFO Epoch: [1] [2440/2502] eta: 0:00:47 lr: 0.000020 loss_cls: 4.4112 (4.0304) grad_norm: 2.3420 (2.3817) time: 0.7538 data: 0.0002 max mem: 8426 +[2024-12-10 12:25:18 root] (utils.py 283): INFO Epoch: [1] [2450/2502] eta: 0:00:39 lr: 0.000020 loss_cls: 4.3396 (4.0306) grad_norm: 2.3461 (2.3821) time: 0.7499 data: 0.0002 max mem: 8426 +[2024-12-10 12:25:26 root] (utils.py 283): INFO Epoch: [1] [2460/2502] eta: 0:00:32 lr: 0.000020 loss_cls: 4.0413 (4.0294) grad_norm: 2.3572 (2.3819) time: 0.7494 data: 0.0002 max mem: 8426 +[2024-12-10 12:25:33 root] (utils.py 283): INFO Epoch: [1] [2470/2502] eta: 0:00:24 lr: 0.000020 loss_cls: 4.1074 (4.0299) grad_norm: 2.3572 (2.3819) time: 0.7500 data: 0.0002 max mem: 8426 +[2024-12-10 12:25:41 root] (utils.py 283): INFO Epoch: [1] [2480/2502] eta: 0:00:16 lr: 0.000020 loss_cls: 4.3093 (4.0316) grad_norm: 2.3321 (2.3819) time: 0.7498 data: 0.0002 max mem: 8426 +[2024-12-10 12:25:49 root] (utils.py 283): INFO Epoch: [1] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 4.2133 (4.0317) grad_norm: 2.2822 (2.3817) time: 0.7722 data: 0.0226 max mem: 8426 +[2024-12-10 12:25:56 root] (utils.py 283): INFO Epoch: [1] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 4.1897 (4.0322) grad_norm: 2.3239 (2.3815) time: 0.7721 data: 0.0226 max mem: 8426 +[2024-12-10 12:25:57 root] (utils.py 283): INFO Epoch: [1] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 4.1969 (4.0326) grad_norm: 2.3239 (2.3816) time: 0.7722 data: 0.0226 max mem: 8426 +[2024-12-10 12:25:57 root] (utils.py 297): INFO Epoch: [1] Total time: 0:31:53 (0.7647 s / it) +[2024-12-10 12:25:57 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 4.1969 (4.0306) grad_norm: 2.3239 (2.3816) +[2024-12-10 12:25:57 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6410 (0.6410) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 96.8750 (96.8750) time: 0.1274 data: 0.0005 max mem: 8426 +[2024-12-10 12:25:59 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7669 (0.8210) acc1: 85.9375 (82.3153) acc3: 95.3125 (93.4659) acc5: 96.8750 (96.3778) time: 0.1278 data: 0.0003 max mem: 8426 +[2024-12-10 12:26:00 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8415 (0.8710) acc1: 80.4688 (81.1756) acc3: 92.1875 (93.0432) acc5: 95.3125 (95.6845) time: 0.1279 data: 0.0003 max mem: 8426 +[2024-12-10 12:26:01 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9647 (0.8845) acc1: 78.9062 (80.3679) acc3: 92.1875 (93.0444) acc5: 96.0938 (95.6401) time: 0.1284 data: 0.0004 max mem: 8426 +[2024-12-10 12:26:02 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8130 (0.8720) acc1: 79.6875 (80.7736) acc3: 93.7500 (93.1784) acc5: 96.0938 (95.7127) time: 0.1285 data: 0.0004 max mem: 8426 +[2024-12-10 12:26:04 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0368 (0.9603) acc1: 75.0000 (78.5539) acc3: 88.2812 (91.7586) acc5: 92.1875 (94.5006) time: 0.1282 data: 0.0004 max mem: 8426 +[2024-12-10 12:26:05 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2625 (1.0065) acc1: 69.5312 (77.6895) acc3: 85.9375 (90.8043) acc5: 89.0625 (93.7116) time: 0.1468 data: 0.0190 max mem: 8426 +[2024-12-10 12:26:07 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2441 (1.0481) acc1: 72.6562 (76.6505) acc3: 86.7188 (90.2619) acc5: 89.8438 (93.2438) time: 0.1764 data: 0.0486 max mem: 8426 +[2024-12-10 12:26:09 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2549 (1.0844) acc1: 72.6562 (75.9645) acc3: 85.1562 (89.5930) acc5: 89.8438 (92.7180) time: 0.1587 data: 0.0302 max mem: 8426 +[2024-12-10 12:26:10 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3057 (1.1114) acc1: 69.5312 (75.2576) acc3: 85.1562 (89.2256) acc5: 89.8438 (92.4536) time: 0.1551 data: 0.0249 max mem: 8426 +[2024-12-10 12:26:11 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1016 (1.0995) acc1: 73.4375 (75.5040) acc3: 88.2812 (89.4320) acc5: 91.4062 (92.6320) time: 0.1524 data: 0.0248 max mem: 8426 +[2024-12-10 12:26:11 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1433 s / it) +[2024-12-10 12:26:11 root] (engine.py 264): INFO * Acc@1 75.412 Acc@3 89.456 Acc@5 92.688 loss 1.097 flops 1.285 layer_flops 1.251 +[2024-12-10 12:26:11 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.4% +[2024-12-10 12:26:11 root] (main.py 576): INFO Max accuracy: 75.41% +[2024-12-10 12:26:12 root] (utils.py 283): INFO Epoch: [2] [ 0/2502] eta: 0:34:16 lr: 0.000020 loss_cls: 4.6078 (4.6078) grad_norm: 2.2774 (2.2774) time: 0.8221 data: 0.0002 max mem: 8426 +[2024-12-10 12:26:20 root] (utils.py 283): INFO Epoch: [2] [ 10/2502] eta: 0:31:48 lr: 0.000020 loss_cls: 4.0818 (4.0699) grad_norm: 2.3387 (2.3387) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 12:26:27 root] (utils.py 283): INFO Epoch: [2] [ 20/2502] eta: 0:31:40 lr: 0.000020 loss_cls: 4.0168 (3.9785) grad_norm: 2.3387 (2.3313) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 12:26:35 root] (utils.py 283): INFO Epoch: [2] [ 30/2502] eta: 0:31:33 lr: 0.000020 loss_cls: 3.8246 (3.8812) grad_norm: 2.3343 (2.3355) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 12:26:43 root] (utils.py 283): INFO Epoch: [2] [ 40/2502] eta: 0:31:34 lr: 0.000020 loss_cls: 4.0427 (3.9296) grad_norm: 2.3343 (2.3303) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-10 12:26:51 root] (utils.py 283): INFO Epoch: [2] [ 50/2502] eta: 0:31:31 lr: 0.000020 loss_cls: 4.0752 (3.9377) grad_norm: 2.3381 (2.3482) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 12:26:58 root] (utils.py 283): INFO Epoch: [2] [ 60/2502] eta: 0:31:26 lr: 0.000020 loss_cls: 4.2203 (3.9672) grad_norm: 2.3381 (2.3539) time: 0.7789 data: 0.0002 max mem: 8426 +[2024-12-10 12:27:06 root] (utils.py 283): INFO Epoch: [2] [ 70/2502] eta: 0:31:14 lr: 0.000020 loss_cls: 4.0841 (3.9448) grad_norm: 2.3323 (2.3496) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 12:27:14 root] (utils.py 283): INFO Epoch: [2] [ 80/2502] eta: 0:31:05 lr: 0.000020 loss_cls: 3.8360 (3.9457) grad_norm: 2.3505 (2.3518) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 12:27:21 root] (utils.py 283): INFO Epoch: [2] [ 90/2502] eta: 0:30:56 lr: 0.000020 loss_cls: 3.9189 (3.9368) grad_norm: 2.3770 (2.3604) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:27:29 root] (utils.py 283): INFO Epoch: [2] [ 100/2502] eta: 0:30:47 lr: 0.000020 loss_cls: 3.9759 (3.9737) grad_norm: 2.3770 (2.3645) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:27:37 root] (utils.py 283): INFO Epoch: [2] [ 110/2502] eta: 0:30:39 lr: 0.000020 loss_cls: 4.1942 (3.9756) grad_norm: 2.3714 (2.3643) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 12:27:44 root] (utils.py 283): INFO Epoch: [2] [ 120/2502] eta: 0:30:30 lr: 0.000020 loss_cls: 3.8226 (3.9676) grad_norm: 2.3509 (2.3634) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 12:27:52 root] (utils.py 283): INFO Epoch: [2] [ 130/2502] eta: 0:30:23 lr: 0.000020 loss_cls: 4.0952 (3.9791) grad_norm: 2.3657 (2.3680) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 12:28:00 root] (utils.py 283): INFO Epoch: [2] [ 140/2502] eta: 0:30:14 lr: 0.000020 loss_cls: 4.1757 (3.9927) grad_norm: 2.4371 (2.3751) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:28:07 root] (utils.py 283): INFO Epoch: [2] [ 150/2502] eta: 0:30:05 lr: 0.000020 loss_cls: 4.1632 (3.9796) grad_norm: 2.4068 (2.3729) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 12:28:15 root] (utils.py 283): INFO Epoch: [2] [ 160/2502] eta: 0:29:56 lr: 0.000020 loss_cls: 4.1632 (3.9964) grad_norm: 2.3529 (2.3733) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 12:28:22 root] (utils.py 283): INFO Epoch: [2] [ 170/2502] eta: 0:29:48 lr: 0.000020 loss_cls: 4.2099 (3.9926) grad_norm: 2.3529 (2.3728) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 12:28:30 root] (utils.py 283): INFO Epoch: [2] [ 180/2502] eta: 0:29:40 lr: 0.000020 loss_cls: 4.0994 (4.0052) grad_norm: 2.3559 (2.3759) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 12:28:38 root] (utils.py 283): INFO Epoch: [2] [ 190/2502] eta: 0:29:32 lr: 0.000020 loss_cls: 4.0527 (3.9955) grad_norm: 2.3471 (2.3710) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 12:28:45 root] (utils.py 283): INFO Epoch: [2] [ 200/2502] eta: 0:29:23 lr: 0.000020 loss_cls: 4.1424 (4.0037) grad_norm: 2.3106 (2.3705) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 12:28:53 root] (utils.py 283): INFO Epoch: [2] [ 210/2502] eta: 0:29:17 lr: 0.000020 loss_cls: 4.3334 (4.0177) grad_norm: 2.3543 (2.3722) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:01 root] (utils.py 283): INFO Epoch: [2] [ 220/2502] eta: 0:29:11 lr: 0.000020 loss_cls: 4.2732 (4.0243) grad_norm: 2.2981 (2.3656) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:09 root] (utils.py 283): INFO Epoch: [2] [ 230/2502] eta: 0:29:05 lr: 0.000020 loss_cls: 4.1737 (4.0153) grad_norm: 2.2527 (2.3620) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:17 root] (utils.py 283): INFO Epoch: [2] [ 240/2502] eta: 0:28:59 lr: 0.000020 loss_cls: 4.2693 (4.0305) grad_norm: 2.2856 (2.3619) time: 0.7889 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:24 root] (utils.py 283): INFO Epoch: [2] [ 250/2502] eta: 0:28:51 lr: 0.000020 loss_cls: 4.3529 (4.0316) grad_norm: 2.3268 (2.3609) time: 0.7771 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:32 root] (utils.py 283): INFO Epoch: [2] [ 260/2502] eta: 0:28:42 lr: 0.000020 loss_cls: 4.1090 (4.0329) grad_norm: 2.3829 (2.3623) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:40 root] (utils.py 283): INFO Epoch: [2] [ 270/2502] eta: 0:28:34 lr: 0.000020 loss_cls: 4.0188 (4.0317) grad_norm: 2.3894 (2.3613) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:47 root] (utils.py 283): INFO Epoch: [2] [ 280/2502] eta: 0:28:26 lr: 0.000020 loss_cls: 4.0945 (4.0378) grad_norm: 2.3894 (2.3639) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 12:29:55 root] (utils.py 283): INFO Epoch: [2] [ 290/2502] eta: 0:28:18 lr: 0.000020 loss_cls: 4.2166 (4.0382) grad_norm: 2.4158 (2.3646) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 12:30:02 root] (utils.py 283): INFO Epoch: [2] [ 300/2502] eta: 0:28:10 lr: 0.000020 loss_cls: 4.2226 (4.0475) grad_norm: 2.3243 (2.3635) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 12:30:10 root] (utils.py 283): INFO Epoch: [2] [ 310/2502] eta: 0:28:01 lr: 0.000020 loss_cls: 4.0680 (4.0394) grad_norm: 2.2957 (2.3636) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 12:30:18 root] (utils.py 283): INFO Epoch: [2] [ 320/2502] eta: 0:27:53 lr: 0.000020 loss_cls: 3.7568 (4.0375) grad_norm: 2.3666 (2.3640) time: 0.7583 data: 0.0002 max mem: 8426 +[2024-12-10 12:30:25 root] (utils.py 283): INFO Epoch: [2] [ 330/2502] eta: 0:27:45 lr: 0.000020 loss_cls: 3.7568 (4.0307) grad_norm: 2.3282 (2.3631) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 12:30:33 root] (utils.py 283): INFO Epoch: [2] [ 340/2502] eta: 0:27:37 lr: 0.000020 loss_cls: 4.1335 (4.0367) grad_norm: 2.3560 (2.3678) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 12:30:40 root] (utils.py 283): INFO Epoch: [2] [ 350/2502] eta: 0:27:29 lr: 0.000020 loss_cls: 4.3651 (4.0392) grad_norm: 2.3873 (2.3680) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 12:30:48 root] (utils.py 283): INFO Epoch: [2] [ 360/2502] eta: 0:27:21 lr: 0.000020 loss_cls: 4.0880 (4.0374) grad_norm: 2.3746 (2.3713) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 12:30:56 root] (utils.py 283): INFO Epoch: [2] [ 370/2502] eta: 0:27:13 lr: 0.000020 loss_cls: 3.8645 (4.0331) grad_norm: 2.3259 (2.3699) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 12:31:03 root] (utils.py 283): INFO Epoch: [2] [ 380/2502] eta: 0:27:05 lr: 0.000020 loss_cls: 4.0403 (4.0285) grad_norm: 2.2749 (2.3690) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:11 root] (utils.py 283): INFO Epoch: [2] [ 390/2502] eta: 0:26:58 lr: 0.000020 loss_cls: 4.0403 (4.0272) grad_norm: 2.2745 (2.3675) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:19 root] (utils.py 283): INFO Epoch: [2] [ 400/2502] eta: 0:26:50 lr: 0.000020 loss_cls: 4.1643 (4.0316) grad_norm: 2.2984 (2.3671) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:26 root] (utils.py 283): INFO Epoch: [2] [ 410/2502] eta: 0:26:42 lr: 0.000020 loss_cls: 4.1602 (4.0314) grad_norm: 2.3371 (2.3680) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:34 root] (utils.py 283): INFO Epoch: [2] [ 420/2502] eta: 0:26:34 lr: 0.000020 loss_cls: 3.8485 (4.0235) grad_norm: 2.3305 (2.3661) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:42 root] (utils.py 283): INFO Epoch: [2] [ 430/2502] eta: 0:26:27 lr: 0.000020 loss_cls: 4.0645 (4.0235) grad_norm: 2.2932 (2.3650) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:49 root] (utils.py 283): INFO Epoch: [2] [ 440/2502] eta: 0:26:20 lr: 0.000020 loss_cls: 3.9558 (4.0227) grad_norm: 2.3349 (2.3653) time: 0.7808 data: 0.0002 max mem: 8426 +[2024-12-10 12:31:57 root] (utils.py 283): INFO Epoch: [2] [ 450/2502] eta: 0:26:13 lr: 0.000020 loss_cls: 4.0212 (4.0237) grad_norm: 2.3546 (2.3655) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:05 root] (utils.py 283): INFO Epoch: [2] [ 460/2502] eta: 0:26:06 lr: 0.000020 loss_cls: 4.2243 (4.0274) grad_norm: 2.3824 (2.3654) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:13 root] (utils.py 283): INFO Epoch: [2] [ 470/2502] eta: 0:25:58 lr: 0.000020 loss_cls: 4.3458 (4.0309) grad_norm: 2.3208 (2.3652) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:20 root] (utils.py 283): INFO Epoch: [2] [ 480/2502] eta: 0:25:51 lr: 0.000020 loss_cls: 3.9890 (4.0255) grad_norm: 2.3284 (2.3652) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 12:32:28 root] (utils.py 283): INFO Epoch: [2] [ 490/2502] eta: 0:25:43 lr: 0.000020 loss_cls: 4.0171 (4.0300) grad_norm: 2.3445 (2.3650) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:36 root] (utils.py 283): INFO Epoch: [2] [ 500/2502] eta: 0:25:35 lr: 0.000020 loss_cls: 4.3528 (4.0305) grad_norm: 2.3152 (2.3631) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:43 root] (utils.py 283): INFO Epoch: [2] [ 510/2502] eta: 0:25:27 lr: 0.000020 loss_cls: 3.8488 (4.0270) grad_norm: 2.2684 (2.3622) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:51 root] (utils.py 283): INFO Epoch: [2] [ 520/2502] eta: 0:25:20 lr: 0.000020 loss_cls: 4.1417 (4.0317) grad_norm: 2.3696 (2.3623) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 12:32:59 root] (utils.py 283): INFO Epoch: [2] [ 530/2502] eta: 0:25:12 lr: 0.000020 loss_cls: 3.9654 (4.0241) grad_norm: 2.3616 (2.3620) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:06 root] (utils.py 283): INFO Epoch: [2] [ 540/2502] eta: 0:25:05 lr: 0.000020 loss_cls: 3.9063 (4.0220) grad_norm: 2.3412 (2.3615) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:14 root] (utils.py 283): INFO Epoch: [2] [ 550/2502] eta: 0:24:57 lr: 0.000020 loss_cls: 4.0060 (4.0196) grad_norm: 2.3296 (2.3605) time: 0.7748 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:22 root] (utils.py 283): INFO Epoch: [2] [ 560/2502] eta: 0:24:49 lr: 0.000020 loss_cls: 4.0760 (4.0204) grad_norm: 2.3017 (2.3598) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:29 root] (utils.py 283): INFO Epoch: [2] [ 570/2502] eta: 0:24:41 lr: 0.000020 loss_cls: 4.0760 (4.0168) grad_norm: 2.2688 (2.3590) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:37 root] (utils.py 283): INFO Epoch: [2] [ 580/2502] eta: 0:24:34 lr: 0.000020 loss_cls: 3.9786 (4.0159) grad_norm: 2.2618 (2.3587) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:45 root] (utils.py 283): INFO Epoch: [2] [ 590/2502] eta: 0:24:26 lr: 0.000020 loss_cls: 4.2647 (4.0205) grad_norm: 2.3091 (2.3583) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 12:33:52 root] (utils.py 283): INFO Epoch: [2] [ 600/2502] eta: 0:24:19 lr: 0.000020 loss_cls: 4.2647 (4.0153) grad_norm: 2.3521 (2.3583) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 12:34:00 root] (utils.py 283): INFO Epoch: [2] [ 610/2502] eta: 0:24:11 lr: 0.000020 loss_cls: 3.7889 (4.0148) grad_norm: 2.3438 (2.3575) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 12:34:08 root] (utils.py 283): INFO Epoch: [2] [ 620/2502] eta: 0:24:03 lr: 0.000020 loss_cls: 4.1927 (4.0159) grad_norm: 2.3232 (2.3572) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 12:34:16 root] (utils.py 283): INFO Epoch: [2] [ 630/2502] eta: 0:23:56 lr: 0.000020 loss_cls: 4.2823 (4.0223) grad_norm: 2.3753 (2.3585) time: 0.7719 data: 0.0003 max mem: 8426 +[2024-12-10 12:34:23 root] (utils.py 283): INFO Epoch: [2] [ 640/2502] eta: 0:23:48 lr: 0.000020 loss_cls: 4.2077 (4.0199) grad_norm: 2.3400 (2.3581) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 12:34:31 root] (utils.py 283): INFO Epoch: [2] [ 650/2502] eta: 0:23:41 lr: 0.000020 loss_cls: 4.1357 (4.0191) grad_norm: 2.3129 (2.3575) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 12:34:39 root] (utils.py 283): INFO Epoch: [2] [ 660/2502] eta: 0:23:33 lr: 0.000020 loss_cls: 3.9559 (4.0179) grad_norm: 2.3351 (2.3583) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 12:34:46 root] (utils.py 283): INFO Epoch: [2] [ 670/2502] eta: 0:23:25 lr: 0.000020 loss_cls: 3.7627 (4.0181) grad_norm: 2.4703 (2.3602) time: 0.7704 data: 0.0003 max mem: 8426 +[2024-12-10 12:34:54 root] (utils.py 283): INFO Epoch: [2] [ 680/2502] eta: 0:23:18 lr: 0.000020 loss_cls: 4.0923 (4.0201) grad_norm: 2.3505 (2.3602) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 12:35:02 root] (utils.py 283): INFO Epoch: [2] [ 690/2502] eta: 0:23:10 lr: 0.000020 loss_cls: 4.2399 (4.0219) grad_norm: 2.3115 (2.3598) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 12:35:09 root] (utils.py 283): INFO Epoch: [2] [ 700/2502] eta: 0:23:02 lr: 0.000020 loss_cls: 4.2653 (4.0237) grad_norm: 2.3370 (2.3604) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 12:35:17 root] (utils.py 283): INFO Epoch: [2] [ 710/2502] eta: 0:22:54 lr: 0.000020 loss_cls: 4.2584 (4.0213) grad_norm: 2.3409 (2.3596) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 12:35:25 root] (utils.py 283): INFO Epoch: [2] [ 720/2502] eta: 0:22:47 lr: 0.000020 loss_cls: 3.8301 (4.0182) grad_norm: 2.3336 (2.3599) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 12:35:32 root] (utils.py 283): INFO Epoch: [2] [ 730/2502] eta: 0:22:39 lr: 0.000020 loss_cls: 3.8970 (4.0165) grad_norm: 2.3282 (2.3589) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 12:35:40 root] (utils.py 283): INFO Epoch: [2] [ 740/2502] eta: 0:22:31 lr: 0.000020 loss_cls: 3.9829 (4.0154) grad_norm: 2.2959 (2.3583) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 12:35:47 root] (utils.py 283): INFO Epoch: [2] [ 750/2502] eta: 0:22:23 lr: 0.000020 loss_cls: 4.1379 (4.0179) grad_norm: 2.2959 (2.3574) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 12:35:55 root] (utils.py 283): INFO Epoch: [2] [ 760/2502] eta: 0:22:16 lr: 0.000020 loss_cls: 4.1764 (4.0207) grad_norm: 2.3056 (2.3574) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 12:36:03 root] (utils.py 283): INFO Epoch: [2] [ 770/2502] eta: 0:22:08 lr: 0.000020 loss_cls: 4.1555 (4.0192) grad_norm: 2.3548 (2.3579) time: 0.7586 data: 0.0003 max mem: 8426 +[2024-12-10 12:36:10 root] (utils.py 283): INFO Epoch: [2] [ 780/2502] eta: 0:22:00 lr: 0.000020 loss_cls: 4.1150 (4.0203) grad_norm: 2.3505 (2.3580) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 12:36:18 root] (utils.py 283): INFO Epoch: [2] [ 790/2502] eta: 0:21:52 lr: 0.000020 loss_cls: 4.1150 (4.0184) grad_norm: 2.3455 (2.3587) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 12:36:26 root] (utils.py 283): INFO Epoch: [2] [ 800/2502] eta: 0:21:44 lr: 0.000020 loss_cls: 3.9131 (4.0185) grad_norm: 2.3455 (2.3584) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 12:36:33 root] (utils.py 283): INFO Epoch: [2] [ 810/2502] eta: 0:21:37 lr: 0.000020 loss_cls: 3.8955 (4.0182) grad_norm: 2.3237 (2.3585) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 12:36:41 root] (utils.py 283): INFO Epoch: [2] [ 820/2502] eta: 0:21:29 lr: 0.000020 loss_cls: 4.3067 (4.0204) grad_norm: 2.3586 (2.3590) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 12:36:49 root] (utils.py 283): INFO Epoch: [2] [ 830/2502] eta: 0:21:22 lr: 0.000020 loss_cls: 4.3067 (4.0215) grad_norm: 2.3537 (2.3590) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 12:36:56 root] (utils.py 283): INFO Epoch: [2] [ 840/2502] eta: 0:21:14 lr: 0.000020 loss_cls: 4.1608 (4.0213) grad_norm: 2.3332 (2.3591) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 12:37:04 root] (utils.py 283): INFO Epoch: [2] [ 850/2502] eta: 0:21:06 lr: 0.000020 loss_cls: 4.0697 (4.0196) grad_norm: 2.3450 (2.3588) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 12:37:11 root] (utils.py 283): INFO Epoch: [2] [ 860/2502] eta: 0:20:58 lr: 0.000020 loss_cls: 4.2078 (4.0242) grad_norm: 2.2892 (2.3576) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 12:37:19 root] (utils.py 283): INFO Epoch: [2] [ 870/2502] eta: 0:20:50 lr: 0.000020 loss_cls: 4.4799 (4.0294) grad_norm: 2.2892 (2.3573) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 12:37:27 root] (utils.py 283): INFO Epoch: [2] [ 880/2502] eta: 0:20:43 lr: 0.000020 loss_cls: 4.3853 (4.0312) grad_norm: 2.3297 (2.3570) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 12:37:34 root] (utils.py 283): INFO Epoch: [2] [ 890/2502] eta: 0:20:35 lr: 0.000020 loss_cls: 4.1327 (4.0290) grad_norm: 2.3598 (2.3573) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 12:37:42 root] (utils.py 283): INFO Epoch: [2] [ 900/2502] eta: 0:20:27 lr: 0.000020 loss_cls: 3.8388 (4.0261) grad_norm: 2.3598 (2.3572) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 12:37:50 root] (utils.py 283): INFO Epoch: [2] [ 910/2502] eta: 0:20:20 lr: 0.000020 loss_cls: 4.0725 (4.0278) grad_norm: 2.3753 (2.3584) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 12:37:57 root] (utils.py 283): INFO Epoch: [2] [ 920/2502] eta: 0:20:12 lr: 0.000020 loss_cls: 4.2915 (4.0275) grad_norm: 2.3833 (2.3586) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 12:38:05 root] (utils.py 283): INFO Epoch: [2] [ 930/2502] eta: 0:20:04 lr: 0.000020 loss_cls: 4.3230 (4.0293) grad_norm: 2.3276 (2.3584) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 12:38:13 root] (utils.py 283): INFO Epoch: [2] [ 940/2502] eta: 0:19:57 lr: 0.000020 loss_cls: 4.2917 (4.0310) grad_norm: 2.3377 (2.3585) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 12:38:20 root] (utils.py 283): INFO Epoch: [2] [ 950/2502] eta: 0:19:49 lr: 0.000020 loss_cls: 4.2868 (4.0313) grad_norm: 2.3035 (2.3583) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 12:38:28 root] (utils.py 283): INFO Epoch: [2] [ 960/2502] eta: 0:19:41 lr: 0.000020 loss_cls: 4.0219 (4.0292) grad_norm: 2.3473 (2.3584) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 12:38:35 root] (utils.py 283): INFO Epoch: [2] [ 970/2502] eta: 0:19:33 lr: 0.000020 loss_cls: 4.1046 (4.0313) grad_norm: 2.3473 (2.3578) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 12:38:43 root] (utils.py 283): INFO Epoch: [2] [ 980/2502] eta: 0:19:26 lr: 0.000020 loss_cls: 4.2766 (4.0331) grad_norm: 2.3039 (2.3584) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 12:38:51 root] (utils.py 283): INFO Epoch: [2] [ 990/2502] eta: 0:19:18 lr: 0.000020 loss_cls: 4.1416 (4.0332) grad_norm: 2.3518 (2.3583) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 12:38:58 root] (utils.py 283): INFO Epoch: [2] [1000/2502] eta: 0:19:10 lr: 0.000020 loss_cls: 4.0573 (4.0338) grad_norm: 2.3335 (2.3578) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:06 root] (utils.py 283): INFO Epoch: [2] [1010/2502] eta: 0:19:03 lr: 0.000020 loss_cls: 4.2685 (4.0358) grad_norm: 2.3704 (2.3584) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:14 root] (utils.py 283): INFO Epoch: [2] [1020/2502] eta: 0:18:55 lr: 0.000020 loss_cls: 4.2667 (4.0345) grad_norm: 2.3994 (2.3587) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:21 root] (utils.py 283): INFO Epoch: [2] [1030/2502] eta: 0:18:47 lr: 0.000020 loss_cls: 3.6587 (4.0313) grad_norm: 2.3690 (2.3586) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:29 root] (utils.py 283): INFO Epoch: [2] [1040/2502] eta: 0:18:40 lr: 0.000020 loss_cls: 3.7744 (4.0311) grad_norm: 2.3690 (2.3588) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:37 root] (utils.py 283): INFO Epoch: [2] [1050/2502] eta: 0:18:32 lr: 0.000020 loss_cls: 4.0834 (4.0308) grad_norm: 2.3430 (2.3588) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:44 root] (utils.py 283): INFO Epoch: [2] [1060/2502] eta: 0:18:24 lr: 0.000020 loss_cls: 4.1070 (4.0315) grad_norm: 2.3493 (2.3593) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 12:39:52 root] (utils.py 283): INFO Epoch: [2] [1070/2502] eta: 0:18:17 lr: 0.000020 loss_cls: 4.1822 (4.0329) grad_norm: 2.3058 (2.3589) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:00 root] (utils.py 283): INFO Epoch: [2] [1080/2502] eta: 0:18:09 lr: 0.000020 loss_cls: 4.1605 (4.0332) grad_norm: 2.2838 (2.3594) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:07 root] (utils.py 283): INFO Epoch: [2] [1090/2502] eta: 0:18:01 lr: 0.000020 loss_cls: 4.2350 (4.0341) grad_norm: 2.2712 (2.3587) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:15 root] (utils.py 283): INFO Epoch: [2] [1100/2502] eta: 0:17:54 lr: 0.000020 loss_cls: 4.2350 (4.0336) grad_norm: 2.2858 (2.3584) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:23 root] (utils.py 283): INFO Epoch: [2] [1110/2502] eta: 0:17:46 lr: 0.000020 loss_cls: 3.7578 (4.0303) grad_norm: 2.3370 (2.3580) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:30 root] (utils.py 283): INFO Epoch: [2] [1120/2502] eta: 0:17:38 lr: 0.000020 loss_cls: 3.8398 (4.0302) grad_norm: 2.3768 (2.3589) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:38 root] (utils.py 283): INFO Epoch: [2] [1130/2502] eta: 0:17:31 lr: 0.000020 loss_cls: 4.0471 (4.0298) grad_norm: 2.3465 (2.3586) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:46 root] (utils.py 283): INFO Epoch: [2] [1140/2502] eta: 0:17:23 lr: 0.000020 loss_cls: 3.8571 (4.0264) grad_norm: 2.3167 (2.3585) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 12:40:53 root] (utils.py 283): INFO Epoch: [2] [1150/2502] eta: 0:17:15 lr: 0.000020 loss_cls: 3.8571 (4.0274) grad_norm: 2.3167 (2.3582) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 12:41:01 root] (utils.py 283): INFO Epoch: [2] [1160/2502] eta: 0:17:08 lr: 0.000020 loss_cls: 4.1871 (4.0274) grad_norm: 2.3270 (2.3584) time: 0.7598 data: 0.0003 max mem: 8426 +[2024-12-10 12:41:09 root] (utils.py 283): INFO Epoch: [2] [1170/2502] eta: 0:17:00 lr: 0.000020 loss_cls: 4.2814 (4.0292) grad_norm: 2.3270 (2.3585) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 12:41:16 root] (utils.py 283): INFO Epoch: [2] [1180/2502] eta: 0:16:52 lr: 0.000020 loss_cls: 4.2814 (4.0289) grad_norm: 2.3146 (2.3582) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 12:41:24 root] (utils.py 283): INFO Epoch: [2] [1190/2502] eta: 0:16:45 lr: 0.000020 loss_cls: 4.1623 (4.0290) grad_norm: 2.3317 (2.3584) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 12:41:32 root] (utils.py 283): INFO Epoch: [2] [1200/2502] eta: 0:16:37 lr: 0.000020 loss_cls: 4.1623 (4.0304) grad_norm: 2.3379 (2.3583) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 12:41:39 root] (utils.py 283): INFO Epoch: [2] [1210/2502] eta: 0:16:29 lr: 0.000020 loss_cls: 3.7850 (4.0262) grad_norm: 2.3379 (2.3586) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 12:41:47 root] (utils.py 283): INFO Epoch: [2] [1220/2502] eta: 0:16:22 lr: 0.000020 loss_cls: 3.7778 (4.0265) grad_norm: 2.3241 (2.3581) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 12:41:55 root] (utils.py 283): INFO Epoch: [2] [1230/2502] eta: 0:16:14 lr: 0.000020 loss_cls: 4.0840 (4.0251) grad_norm: 2.2734 (2.3576) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 12:42:02 root] (utils.py 283): INFO Epoch: [2] [1240/2502] eta: 0:16:06 lr: 0.000020 loss_cls: 4.1693 (4.0272) grad_norm: 2.2894 (2.3582) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 12:42:10 root] (utils.py 283): INFO Epoch: [2] [1250/2502] eta: 0:15:59 lr: 0.000020 loss_cls: 4.3170 (4.0297) grad_norm: 2.3462 (2.3580) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 12:42:18 root] (utils.py 283): INFO Epoch: [2] [1260/2502] eta: 0:15:51 lr: 0.000020 loss_cls: 4.3094 (4.0309) grad_norm: 2.3010 (2.3572) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 12:42:25 root] (utils.py 283): INFO Epoch: [2] [1270/2502] eta: 0:15:43 lr: 0.000020 loss_cls: 4.1551 (4.0311) grad_norm: 2.3010 (2.3573) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 12:42:33 root] (utils.py 283): INFO Epoch: [2] [1280/2502] eta: 0:15:36 lr: 0.000020 loss_cls: 4.1232 (4.0313) grad_norm: 2.3546 (2.3573) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 12:42:40 root] (utils.py 283): INFO Epoch: [2] [1290/2502] eta: 0:15:28 lr: 0.000020 loss_cls: 4.2362 (4.0336) grad_norm: 2.3848 (2.3583) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:42:48 root] (utils.py 283): INFO Epoch: [2] [1300/2502] eta: 0:15:20 lr: 0.000020 loss_cls: 4.3107 (4.0358) grad_norm: 2.4410 (2.3588) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 12:42:56 root] (utils.py 283): INFO Epoch: [2] [1310/2502] eta: 0:15:13 lr: 0.000020 loss_cls: 4.3364 (4.0375) grad_norm: 2.3757 (2.3594) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:03 root] (utils.py 283): INFO Epoch: [2] [1320/2502] eta: 0:15:05 lr: 0.000020 loss_cls: 4.4429 (4.0391) grad_norm: 2.3839 (2.3595) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 12:43:11 root] (utils.py 283): INFO Epoch: [2] [1330/2502] eta: 0:14:57 lr: 0.000020 loss_cls: 4.1337 (4.0371) grad_norm: 2.3626 (2.3595) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:19 root] (utils.py 283): INFO Epoch: [2] [1340/2502] eta: 0:14:50 lr: 0.000020 loss_cls: 4.0278 (4.0378) grad_norm: 2.3172 (2.3598) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:26 root] (utils.py 283): INFO Epoch: [2] [1350/2502] eta: 0:14:42 lr: 0.000020 loss_cls: 4.2039 (4.0369) grad_norm: 2.3687 (2.3602) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:34 root] (utils.py 283): INFO Epoch: [2] [1360/2502] eta: 0:14:34 lr: 0.000020 loss_cls: 4.2039 (4.0374) grad_norm: 2.3687 (2.3603) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:42 root] (utils.py 283): INFO Epoch: [2] [1370/2502] eta: 0:14:27 lr: 0.000020 loss_cls: 4.3731 (4.0389) grad_norm: 2.3844 (2.3603) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:49 root] (utils.py 283): INFO Epoch: [2] [1380/2502] eta: 0:14:19 lr: 0.000020 loss_cls: 4.2270 (4.0388) grad_norm: 2.3831 (2.3603) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 12:43:57 root] (utils.py 283): INFO Epoch: [2] [1390/2502] eta: 0:14:11 lr: 0.000020 loss_cls: 4.1399 (4.0393) grad_norm: 2.3592 (2.3605) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:04 root] (utils.py 283): INFO Epoch: [2] [1400/2502] eta: 0:14:04 lr: 0.000020 loss_cls: 4.1422 (4.0392) grad_norm: 2.3549 (2.3604) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:12 root] (utils.py 283): INFO Epoch: [2] [1410/2502] eta: 0:13:56 lr: 0.000020 loss_cls: 4.0588 (4.0391) grad_norm: 2.3009 (2.3604) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:20 root] (utils.py 283): INFO Epoch: [2] [1420/2502] eta: 0:13:48 lr: 0.000020 loss_cls: 4.2984 (4.0391) grad_norm: 2.2526 (2.3601) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:27 root] (utils.py 283): INFO Epoch: [2] [1430/2502] eta: 0:13:41 lr: 0.000020 loss_cls: 3.8221 (4.0365) grad_norm: 2.2937 (2.3595) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:35 root] (utils.py 283): INFO Epoch: [2] [1440/2502] eta: 0:13:33 lr: 0.000020 loss_cls: 4.0083 (4.0367) grad_norm: 2.3166 (2.3595) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 12:44:43 root] (utils.py 283): INFO Epoch: [2] [1450/2502] eta: 0:13:25 lr: 0.000020 loss_cls: 4.2445 (4.0361) grad_norm: 2.3887 (2.3598) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:50 root] (utils.py 283): INFO Epoch: [2] [1460/2502] eta: 0:13:17 lr: 0.000020 loss_cls: 3.8770 (4.0352) grad_norm: 2.3810 (2.3597) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 12:44:58 root] (utils.py 283): INFO Epoch: [2] [1470/2502] eta: 0:13:10 lr: 0.000020 loss_cls: 3.6945 (4.0325) grad_norm: 2.3490 (2.3592) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 12:45:06 root] (utils.py 283): INFO Epoch: [2] [1480/2502] eta: 0:13:02 lr: 0.000020 loss_cls: 3.6945 (4.0311) grad_norm: 2.3020 (2.3587) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 12:45:13 root] (utils.py 283): INFO Epoch: [2] [1490/2502] eta: 0:12:54 lr: 0.000020 loss_cls: 4.1165 (4.0320) grad_norm: 2.3020 (2.3588) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 12:45:21 root] (utils.py 283): INFO Epoch: [2] [1500/2502] eta: 0:12:47 lr: 0.000020 loss_cls: 4.1560 (4.0309) grad_norm: 2.3010 (2.3584) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 12:45:29 root] (utils.py 283): INFO Epoch: [2] [1510/2502] eta: 0:12:39 lr: 0.000020 loss_cls: 4.0702 (4.0315) grad_norm: 2.2865 (2.3579) time: 0.7802 data: 0.0003 max mem: 8426 +[2024-12-10 12:45:36 root] (utils.py 283): INFO Epoch: [2] [1520/2502] eta: 0:12:32 lr: 0.000020 loss_cls: 3.9431 (4.0300) grad_norm: 2.2865 (2.3577) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 12:45:44 root] (utils.py 283): INFO Epoch: [2] [1530/2502] eta: 0:12:24 lr: 0.000020 loss_cls: 3.9431 (4.0304) grad_norm: 2.2802 (2.3581) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 12:45:52 root] (utils.py 283): INFO Epoch: [2] [1540/2502] eta: 0:12:16 lr: 0.000020 loss_cls: 4.2919 (4.0314) grad_norm: 2.2861 (2.3577) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:45:59 root] (utils.py 283): INFO Epoch: [2] [1550/2502] eta: 0:12:09 lr: 0.000020 loss_cls: 4.1834 (4.0316) grad_norm: 2.3266 (2.3583) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:07 root] (utils.py 283): INFO Epoch: [2] [1560/2502] eta: 0:12:01 lr: 0.000020 loss_cls: 3.9927 (4.0316) grad_norm: 2.3503 (2.3582) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:15 root] (utils.py 283): INFO Epoch: [2] [1570/2502] eta: 0:11:54 lr: 0.000020 loss_cls: 4.1338 (4.0330) grad_norm: 2.3445 (2.3583) time: 0.7792 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:23 root] (utils.py 283): INFO Epoch: [2] [1580/2502] eta: 0:11:46 lr: 0.000020 loss_cls: 4.2382 (4.0325) grad_norm: 2.3445 (2.3579) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:30 root] (utils.py 283): INFO Epoch: [2] [1590/2502] eta: 0:11:38 lr: 0.000020 loss_cls: 4.1769 (4.0329) grad_norm: 2.2775 (2.3574) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:38 root] (utils.py 283): INFO Epoch: [2] [1600/2502] eta: 0:11:31 lr: 0.000020 loss_cls: 4.1995 (4.0322) grad_norm: 2.2966 (2.3575) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:46 root] (utils.py 283): INFO Epoch: [2] [1610/2502] eta: 0:11:23 lr: 0.000020 loss_cls: 4.1383 (4.0317) grad_norm: 2.3169 (2.3577) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 12:46:53 root] (utils.py 283): INFO Epoch: [2] [1620/2502] eta: 0:11:15 lr: 0.000020 loss_cls: 4.2869 (4.0328) grad_norm: 2.3112 (2.3575) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:01 root] (utils.py 283): INFO Epoch: [2] [1630/2502] eta: 0:11:08 lr: 0.000020 loss_cls: 4.2870 (4.0321) grad_norm: 2.3112 (2.3575) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:09 root] (utils.py 283): INFO Epoch: [2] [1640/2502] eta: 0:11:00 lr: 0.000020 loss_cls: 4.2288 (4.0330) grad_norm: 2.4059 (2.3575) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:16 root] (utils.py 283): INFO Epoch: [2] [1650/2502] eta: 0:10:52 lr: 0.000020 loss_cls: 4.0680 (4.0317) grad_norm: 2.3883 (2.3575) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:24 root] (utils.py 283): INFO Epoch: [2] [1660/2502] eta: 0:10:45 lr: 0.000020 loss_cls: 3.9631 (4.0312) grad_norm: 2.3396 (2.3573) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:32 root] (utils.py 283): INFO Epoch: [2] [1670/2502] eta: 0:10:37 lr: 0.000020 loss_cls: 3.9828 (4.0301) grad_norm: 2.3073 (2.3571) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:39 root] (utils.py 283): INFO Epoch: [2] [1680/2502] eta: 0:10:29 lr: 0.000020 loss_cls: 3.7836 (4.0289) grad_norm: 2.2613 (2.3564) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 12:47:47 root] (utils.py 283): INFO Epoch: [2] [1690/2502] eta: 0:10:21 lr: 0.000020 loss_cls: 3.7836 (4.0279) grad_norm: 2.2822 (2.3564) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 12:47:54 root] (utils.py 283): INFO Epoch: [2] [1700/2502] eta: 0:10:14 lr: 0.000020 loss_cls: 4.2178 (4.0291) grad_norm: 2.3344 (2.3565) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:02 root] (utils.py 283): INFO Epoch: [2] [1710/2502] eta: 0:10:06 lr: 0.000020 loss_cls: 4.2205 (4.0292) grad_norm: 2.3383 (2.3566) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:10 root] (utils.py 283): INFO Epoch: [2] [1720/2502] eta: 0:09:59 lr: 0.000020 loss_cls: 4.2287 (4.0313) grad_norm: 2.3215 (2.3565) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:18 root] (utils.py 283): INFO Epoch: [2] [1730/2502] eta: 0:09:51 lr: 0.000020 loss_cls: 4.2287 (4.0309) grad_norm: 2.3130 (2.3563) time: 0.7730 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:25 root] (utils.py 283): INFO Epoch: [2] [1740/2502] eta: 0:09:43 lr: 0.000020 loss_cls: 3.7851 (4.0289) grad_norm: 2.3902 (2.3569) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:33 root] (utils.py 283): INFO Epoch: [2] [1750/2502] eta: 0:09:36 lr: 0.000020 loss_cls: 3.6285 (4.0275) grad_norm: 2.4150 (2.3572) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:41 root] (utils.py 283): INFO Epoch: [2] [1760/2502] eta: 0:09:28 lr: 0.000020 loss_cls: 3.6285 (4.0263) grad_norm: 2.4097 (2.3576) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:48 root] (utils.py 283): INFO Epoch: [2] [1770/2502] eta: 0:09:20 lr: 0.000020 loss_cls: 3.9943 (4.0269) grad_norm: 2.3776 (2.3577) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 12:48:56 root] (utils.py 283): INFO Epoch: [2] [1780/2502] eta: 0:09:13 lr: 0.000020 loss_cls: 4.1835 (4.0269) grad_norm: 2.3167 (2.3572) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:04 root] (utils.py 283): INFO Epoch: [2] [1790/2502] eta: 0:09:05 lr: 0.000020 loss_cls: 4.1867 (4.0281) grad_norm: 2.2908 (2.3567) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:11 root] (utils.py 283): INFO Epoch: [2] [1800/2502] eta: 0:08:57 lr: 0.000020 loss_cls: 4.2300 (4.0277) grad_norm: 2.3177 (2.3570) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:19 root] (utils.py 283): INFO Epoch: [2] [1810/2502] eta: 0:08:50 lr: 0.000020 loss_cls: 4.1350 (4.0274) grad_norm: 2.3904 (2.3570) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:27 root] (utils.py 283): INFO Epoch: [2] [1820/2502] eta: 0:08:42 lr: 0.000020 loss_cls: 4.1201 (4.0277) grad_norm: 2.3512 (2.3570) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:34 root] (utils.py 283): INFO Epoch: [2] [1830/2502] eta: 0:08:34 lr: 0.000020 loss_cls: 4.0324 (4.0270) grad_norm: 2.3395 (2.3567) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:42 root] (utils.py 283): INFO Epoch: [2] [1840/2502] eta: 0:08:27 lr: 0.000020 loss_cls: 4.0767 (4.0269) grad_norm: 2.3087 (2.3566) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 12:49:50 root] (utils.py 283): INFO Epoch: [2] [1850/2502] eta: 0:08:19 lr: 0.000020 loss_cls: 3.6778 (4.0249) grad_norm: 2.2980 (2.3565) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 12:49:57 root] (utils.py 283): INFO Epoch: [2] [1860/2502] eta: 0:08:11 lr: 0.000020 loss_cls: 3.6778 (4.0244) grad_norm: 2.2899 (2.3565) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 12:50:05 root] (utils.py 283): INFO Epoch: [2] [1870/2502] eta: 0:08:04 lr: 0.000020 loss_cls: 4.0734 (4.0248) grad_norm: 2.2860 (2.3565) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 12:50:13 root] (utils.py 283): INFO Epoch: [2] [1880/2502] eta: 0:07:56 lr: 0.000020 loss_cls: 4.0734 (4.0234) grad_norm: 2.3065 (2.3565) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 12:50:20 root] (utils.py 283): INFO Epoch: [2] [1890/2502] eta: 0:07:48 lr: 0.000020 loss_cls: 4.0579 (4.0234) grad_norm: 2.2841 (2.3560) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 12:50:28 root] (utils.py 283): INFO Epoch: [2] [1900/2502] eta: 0:07:41 lr: 0.000020 loss_cls: 4.0722 (4.0225) grad_norm: 2.2841 (2.3557) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 12:50:35 root] (utils.py 283): INFO Epoch: [2] [1910/2502] eta: 0:07:33 lr: 0.000020 loss_cls: 3.9619 (4.0218) grad_norm: 2.3096 (2.3562) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 12:50:43 root] (utils.py 283): INFO Epoch: [2] [1920/2502] eta: 0:07:25 lr: 0.000020 loss_cls: 3.7894 (4.0211) grad_norm: 2.3852 (2.3562) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 12:50:51 root] (utils.py 283): INFO Epoch: [2] [1930/2502] eta: 0:07:18 lr: 0.000020 loss_cls: 3.9621 (4.0214) grad_norm: 2.3033 (2.3562) time: 0.7698 data: 0.0003 max mem: 8426 +[2024-12-10 12:50:59 root] (utils.py 283): INFO Epoch: [2] [1940/2502] eta: 0:07:10 lr: 0.000020 loss_cls: 3.9293 (4.0202) grad_norm: 2.3301 (2.3564) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 12:51:06 root] (utils.py 283): INFO Epoch: [2] [1950/2502] eta: 0:07:02 lr: 0.000020 loss_cls: 3.8257 (4.0187) grad_norm: 2.3256 (2.3568) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 12:51:14 root] (utils.py 283): INFO Epoch: [2] [1960/2502] eta: 0:06:55 lr: 0.000020 loss_cls: 4.1524 (4.0196) grad_norm: 2.3487 (2.3568) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 12:51:22 root] (utils.py 283): INFO Epoch: [2] [1970/2502] eta: 0:06:47 lr: 0.000020 loss_cls: 4.1043 (4.0185) grad_norm: 2.3487 (2.3567) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 12:51:29 root] (utils.py 283): INFO Epoch: [2] [1980/2502] eta: 0:06:39 lr: 0.000020 loss_cls: 4.0707 (4.0187) grad_norm: 2.2855 (2.3566) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 12:51:37 root] (utils.py 283): INFO Epoch: [2] [1990/2502] eta: 0:06:32 lr: 0.000020 loss_cls: 3.9783 (4.0178) grad_norm: 2.3089 (2.3567) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 12:51:45 root] (utils.py 283): INFO Epoch: [2] [2000/2502] eta: 0:06:24 lr: 0.000020 loss_cls: 3.7618 (4.0158) grad_norm: 2.3208 (2.3565) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 12:51:52 root] (utils.py 283): INFO Epoch: [2] [2010/2502] eta: 0:06:16 lr: 0.000020 loss_cls: 3.7618 (4.0155) grad_norm: 2.3142 (2.3563) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 12:52:00 root] (utils.py 283): INFO Epoch: [2] [2020/2502] eta: 0:06:09 lr: 0.000020 loss_cls: 4.0825 (4.0150) grad_norm: 2.3129 (2.3561) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 12:52:07 root] (utils.py 283): INFO Epoch: [2] [2030/2502] eta: 0:06:01 lr: 0.000020 loss_cls: 4.1196 (4.0152) grad_norm: 2.2633 (2.3558) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 12:52:15 root] (utils.py 283): INFO Epoch: [2] [2040/2502] eta: 0:05:53 lr: 0.000020 loss_cls: 4.0607 (4.0134) grad_norm: 2.3052 (2.3556) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 12:52:23 root] (utils.py 283): INFO Epoch: [2] [2050/2502] eta: 0:05:46 lr: 0.000020 loss_cls: 4.0607 (4.0129) grad_norm: 2.3220 (2.3555) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 12:52:30 root] (utils.py 283): INFO Epoch: [2] [2060/2502] eta: 0:05:38 lr: 0.000020 loss_cls: 4.1917 (4.0139) grad_norm: 2.2944 (2.3553) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 12:52:38 root] (utils.py 283): INFO Epoch: [2] [2070/2502] eta: 0:05:30 lr: 0.000020 loss_cls: 4.0887 (4.0130) grad_norm: 2.2581 (2.3552) time: 0.7723 data: 0.0003 max mem: 8426 +[2024-12-10 12:52:46 root] (utils.py 283): INFO Epoch: [2] [2080/2502] eta: 0:05:23 lr: 0.000020 loss_cls: 3.9308 (4.0115) grad_norm: 2.3052 (2.3550) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 12:52:54 root] (utils.py 283): INFO Epoch: [2] [2090/2502] eta: 0:05:15 lr: 0.000020 loss_cls: 3.9887 (4.0116) grad_norm: 2.3254 (2.3549) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:02 root] (utils.py 283): INFO Epoch: [2] [2100/2502] eta: 0:05:08 lr: 0.000020 loss_cls: 3.9887 (4.0115) grad_norm: 2.3156 (2.3545) time: 0.7772 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:09 root] (utils.py 283): INFO Epoch: [2] [2110/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 3.6824 (4.0103) grad_norm: 2.3052 (2.3545) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:17 root] (utils.py 283): INFO Epoch: [2] [2120/2502] eta: 0:04:52 lr: 0.000020 loss_cls: 3.7558 (4.0097) grad_norm: 2.3357 (2.3545) time: 0.7791 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:25 root] (utils.py 283): INFO Epoch: [2] [2130/2502] eta: 0:04:45 lr: 0.000020 loss_cls: 3.8224 (4.0102) grad_norm: 2.3289 (2.3544) time: 0.7845 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:33 root] (utils.py 283): INFO Epoch: [2] [2140/2502] eta: 0:04:37 lr: 0.000020 loss_cls: 4.1010 (4.0099) grad_norm: 2.3178 (2.3545) time: 0.7862 data: 0.0003 max mem: 8426 +[2024-12-10 12:53:41 root] (utils.py 283): INFO Epoch: [2] [2150/2502] eta: 0:04:29 lr: 0.000020 loss_cls: 4.4126 (4.0123) grad_norm: 2.3489 (2.3546) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:48 root] (utils.py 283): INFO Epoch: [2] [2160/2502] eta: 0:04:22 lr: 0.000020 loss_cls: 4.5103 (4.0137) grad_norm: 2.3489 (2.3548) time: 0.7789 data: 0.0002 max mem: 8426 +[2024-12-10 12:53:56 root] (utils.py 283): INFO Epoch: [2] [2170/2502] eta: 0:04:14 lr: 0.000020 loss_cls: 4.3605 (4.0142) grad_norm: 2.3698 (2.3551) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 12:54:04 root] (utils.py 283): INFO Epoch: [2] [2180/2502] eta: 0:04:06 lr: 0.000020 loss_cls: 4.1762 (4.0131) grad_norm: 2.3532 (2.3550) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 12:54:12 root] (utils.py 283): INFO Epoch: [2] [2190/2502] eta: 0:03:59 lr: 0.000020 loss_cls: 4.0277 (4.0127) grad_norm: 2.3330 (2.3551) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 12:54:20 root] (utils.py 283): INFO Epoch: [2] [2200/2502] eta: 0:03:51 lr: 0.000020 loss_cls: 4.0747 (4.0132) grad_norm: 2.2934 (2.3549) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-10 12:54:28 root] (utils.py 283): INFO Epoch: [2] [2210/2502] eta: 0:03:43 lr: 0.000020 loss_cls: 3.9781 (4.0123) grad_norm: 2.2794 (2.3546) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 12:54:35 root] (utils.py 283): INFO Epoch: [2] [2220/2502] eta: 0:03:36 lr: 0.000020 loss_cls: 3.9856 (4.0128) grad_norm: 2.3065 (2.3546) time: 0.7805 data: 0.0002 max mem: 8426 +[2024-12-10 12:54:43 root] (utils.py 283): INFO Epoch: [2] [2230/2502] eta: 0:03:28 lr: 0.000020 loss_cls: 4.0480 (4.0130) grad_norm: 2.3863 (2.3548) time: 0.7800 data: 0.0003 max mem: 8426 +[2024-12-10 12:54:51 root] (utils.py 283): INFO Epoch: [2] [2240/2502] eta: 0:03:21 lr: 0.000020 loss_cls: 4.1077 (4.0136) grad_norm: 2.3863 (2.3549) time: 0.7790 data: 0.0003 max mem: 8426 +[2024-12-10 12:54:59 root] (utils.py 283): INFO Epoch: [2] [2250/2502] eta: 0:03:13 lr: 0.000020 loss_cls: 4.1865 (4.0129) grad_norm: 2.3169 (2.3551) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 12:55:06 root] (utils.py 283): INFO Epoch: [2] [2260/2502] eta: 0:03:05 lr: 0.000020 loss_cls: 4.3175 (4.0143) grad_norm: 2.3337 (2.3554) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 12:55:14 root] (utils.py 283): INFO Epoch: [2] [2270/2502] eta: 0:02:58 lr: 0.000020 loss_cls: 4.3175 (4.0150) grad_norm: 2.3408 (2.3552) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 12:55:22 root] (utils.py 283): INFO Epoch: [2] [2280/2502] eta: 0:02:50 lr: 0.000020 loss_cls: 3.7041 (4.0132) grad_norm: 2.2581 (2.3551) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 12:55:29 root] (utils.py 283): INFO Epoch: [2] [2290/2502] eta: 0:02:42 lr: 0.000020 loss_cls: 3.7545 (4.0128) grad_norm: 2.2617 (2.3548) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 12:55:37 root] (utils.py 283): INFO Epoch: [2] [2300/2502] eta: 0:02:34 lr: 0.000020 loss_cls: 4.0797 (4.0138) grad_norm: 2.2764 (2.3546) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 12:55:45 root] (utils.py 283): INFO Epoch: [2] [2310/2502] eta: 0:02:27 lr: 0.000020 loss_cls: 4.1347 (4.0141) grad_norm: 2.3192 (2.3548) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 12:55:52 root] (utils.py 283): INFO Epoch: [2] [2320/2502] eta: 0:02:19 lr: 0.000020 loss_cls: 3.9945 (4.0139) grad_norm: 2.3589 (2.3546) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 12:56:00 root] (utils.py 283): INFO Epoch: [2] [2330/2502] eta: 0:02:11 lr: 0.000020 loss_cls: 3.8607 (4.0128) grad_norm: 2.2750 (2.3543) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:08 root] (utils.py 283): INFO Epoch: [2] [2340/2502] eta: 0:02:04 lr: 0.000020 loss_cls: 3.9079 (4.0137) grad_norm: 2.3076 (2.3542) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:15 root] (utils.py 283): INFO Epoch: [2] [2350/2502] eta: 0:01:56 lr: 0.000020 loss_cls: 4.1635 (4.0138) grad_norm: 2.3439 (2.3542) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:23 root] (utils.py 283): INFO Epoch: [2] [2360/2502] eta: 0:01:48 lr: 0.000020 loss_cls: 4.0736 (4.0141) grad_norm: 2.3968 (2.3545) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:31 root] (utils.py 283): INFO Epoch: [2] [2370/2502] eta: 0:01:41 lr: 0.000020 loss_cls: 3.9911 (4.0138) grad_norm: 2.3426 (2.3543) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:38 root] (utils.py 283): INFO Epoch: [2] [2380/2502] eta: 0:01:33 lr: 0.000020 loss_cls: 3.9932 (4.0141) grad_norm: 2.3271 (2.3543) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:46 root] (utils.py 283): INFO Epoch: [2] [2390/2502] eta: 0:01:25 lr: 0.000020 loss_cls: 4.3511 (4.0147) grad_norm: 2.3456 (2.3544) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 12:56:54 root] (utils.py 283): INFO Epoch: [2] [2400/2502] eta: 0:01:18 lr: 0.000020 loss_cls: 3.9316 (4.0133) grad_norm: 2.3237 (2.3541) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 12:57:01 root] (utils.py 283): INFO Epoch: [2] [2410/2502] eta: 0:01:10 lr: 0.000020 loss_cls: 3.8238 (4.0127) grad_norm: 2.2599 (2.3538) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 12:57:09 root] (utils.py 283): INFO Epoch: [2] [2420/2502] eta: 0:01:02 lr: 0.000020 loss_cls: 4.2146 (4.0135) grad_norm: 2.2704 (2.3538) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 12:57:16 root] (utils.py 283): INFO Epoch: [2] [2430/2502] eta: 0:00:55 lr: 0.000020 loss_cls: 4.2146 (4.0134) grad_norm: 2.3874 (2.3542) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 12:57:24 root] (utils.py 283): INFO Epoch: [2] [2440/2502] eta: 0:00:47 lr: 0.000020 loss_cls: 3.8082 (4.0118) grad_norm: 2.3184 (2.3541) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 12:57:32 root] (utils.py 283): INFO Epoch: [2] [2450/2502] eta: 0:00:39 lr: 0.000020 loss_cls: 3.8479 (4.0117) grad_norm: 2.3032 (2.3541) time: 0.7752 data: 0.0002 max mem: 8426 +[2024-12-10 12:57:40 root] (utils.py 283): INFO Epoch: [2] [2460/2502] eta: 0:00:32 lr: 0.000020 loss_cls: 4.1619 (4.0123) grad_norm: 2.3201 (2.3542) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 12:57:47 root] (utils.py 283): INFO Epoch: [2] [2470/2502] eta: 0:00:24 lr: 0.000020 loss_cls: 4.2174 (4.0129) grad_norm: 2.3166 (2.3540) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 12:57:55 root] (utils.py 283): INFO Epoch: [2] [2480/2502] eta: 0:00:16 lr: 0.000020 loss_cls: 4.0490 (4.0130) grad_norm: 2.2502 (2.3535) time: 0.7853 data: 0.0002 max mem: 8426 +[2024-12-10 12:58:03 root] (utils.py 283): INFO Epoch: [2] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 3.9185 (4.0122) grad_norm: 2.2065 (2.3535) time: 0.7970 data: 0.0236 max mem: 8426 +[2024-12-10 12:58:11 root] (utils.py 283): INFO Epoch: [2] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.9484 (4.0117) grad_norm: 2.2898 (2.3536) time: 0.7869 data: 0.0236 max mem: 8426 +[2024-12-10 12:58:12 root] (utils.py 283): INFO Epoch: [2] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 3.9484 (4.0117) grad_norm: 2.3180 (2.3536) time: 0.7878 data: 0.0236 max mem: 8426 +[2024-12-10 12:58:12 root] (utils.py 297): INFO Epoch: [2] Total time: 0:32:00 (0.7676 s / it) +[2024-12-10 12:58:12 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 3.9484 (4.0091) grad_norm: 2.3180 (2.3536) +[2024-12-10 12:58:12 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6275 (0.6275) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 100.0000 (100.0000) time: 0.1277 data: 0.0005 max mem: 8426 +[2024-12-10 12:58:14 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7458 (0.8115) acc1: 85.9375 (82.0312) acc3: 94.5312 (94.0341) acc5: 96.8750 (97.1591) time: 0.1278 data: 0.0004 max mem: 8426 +[2024-12-10 12:58:15 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8928 (0.8734) acc1: 78.9062 (80.3571) acc3: 92.9688 (93.1920) acc5: 95.3125 (95.9821) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 12:58:16 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9102 (0.8902) acc1: 78.1250 (79.4859) acc3: 92.1875 (93.2460) acc5: 95.3125 (95.8165) time: 0.1285 data: 0.0005 max mem: 8426 +[2024-12-10 12:58:18 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8286 (0.8754) acc1: 80.4688 (80.1829) acc3: 94.5312 (93.3689) acc5: 95.3125 (95.7889) time: 0.1415 data: 0.0131 max mem: 8426 +[2024-12-10 12:58:19 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0643 (0.9616) acc1: 75.7812 (78.2169) acc3: 88.2812 (91.8964) acc5: 92.9688 (94.7304) time: 0.1412 data: 0.0131 max mem: 8426 +[2024-12-10 12:58:21 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2648 (1.0080) acc1: 70.3125 (77.4718) acc3: 85.9375 (90.9836) acc5: 89.8438 (94.0061) time: 0.1437 data: 0.0159 max mem: 8426 +[2024-12-10 12:58:22 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2244 (1.0482) acc1: 74.2188 (76.5955) acc3: 86.7188 (90.4159) acc5: 89.8438 (93.5519) time: 0.1515 data: 0.0238 max mem: 8426 +[2024-12-10 12:58:24 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2244 (1.0833) acc1: 72.6562 (75.8777) acc3: 86.7188 (89.7666) acc5: 90.6250 (93.0073) time: 0.1518 data: 0.0237 max mem: 8426 +[2024-12-10 12:58:25 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2819 (1.1119) acc1: 71.0938 (75.1202) acc3: 84.3750 (89.3201) acc5: 89.8438 (92.7627) time: 0.1440 data: 0.0158 max mem: 8426 +[2024-12-10 12:58:26 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1491 (1.1003) acc1: 72.6562 (75.4480) acc3: 89.0625 (89.5040) acc5: 91.4062 (92.9600) time: 0.1267 data: 0.0006 max mem: 8426 +[2024-12-10 12:58:26 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1386 s / it) +[2024-12-10 12:58:26 root] (engine.py 264): INFO * Acc@1 75.392 Acc@3 89.424 Acc@5 92.760 loss 1.101 flops 1.285 layer_flops 1.251 +[2024-12-10 12:58:26 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.4% +[2024-12-10 12:58:26 root] (main.py 576): INFO Max accuracy: 75.41% +[2024-12-10 12:58:27 root] (utils.py 283): INFO Epoch: [3] [ 0/2502] eta: 0:32:09 lr: 0.000020 loss_cls: 3.1151 (3.1151) grad_norm: 2.4401 (2.4401) time: 0.7712 data: 0.0004 max mem: 8426 +[2024-12-10 12:58:35 root] (utils.py 283): INFO Epoch: [3] [ 10/2502] eta: 0:31:52 lr: 0.000020 loss_cls: 3.7040 (3.7877) grad_norm: 2.3101 (2.3132) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 12:58:42 root] (utils.py 283): INFO Epoch: [3] [ 20/2502] eta: 0:31:39 lr: 0.000020 loss_cls: 3.8732 (3.8590) grad_norm: 2.2933 (2.3191) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 12:58:50 root] (utils.py 283): INFO Epoch: [3] [ 30/2502] eta: 0:31:29 lr: 0.000020 loss_cls: 4.0188 (3.8490) grad_norm: 2.2981 (2.3210) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 12:58:58 root] (utils.py 283): INFO Epoch: [3] [ 40/2502] eta: 0:31:20 lr: 0.000020 loss_cls: 4.0555 (3.9262) grad_norm: 2.3220 (2.3279) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:05 root] (utils.py 283): INFO Epoch: [3] [ 50/2502] eta: 0:31:12 lr: 0.000020 loss_cls: 4.0065 (3.8862) grad_norm: 2.3359 (2.3364) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:13 root] (utils.py 283): INFO Epoch: [3] [ 60/2502] eta: 0:31:03 lr: 0.000020 loss_cls: 3.7530 (3.8386) grad_norm: 2.3054 (2.3312) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:21 root] (utils.py 283): INFO Epoch: [3] [ 70/2502] eta: 0:30:55 lr: 0.000020 loss_cls: 3.9448 (3.8674) grad_norm: 2.1826 (2.3123) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:28 root] (utils.py 283): INFO Epoch: [3] [ 80/2502] eta: 0:30:47 lr: 0.000020 loss_cls: 4.2432 (3.9019) grad_norm: 2.2226 (2.3153) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:36 root] (utils.py 283): INFO Epoch: [3] [ 90/2502] eta: 0:30:41 lr: 0.000020 loss_cls: 4.2432 (3.9003) grad_norm: 2.3143 (2.3208) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:44 root] (utils.py 283): INFO Epoch: [3] [ 100/2502] eta: 0:30:35 lr: 0.000020 loss_cls: 4.2491 (3.9204) grad_norm: 2.3685 (2.3272) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 12:59:51 root] (utils.py 283): INFO Epoch: [3] [ 110/2502] eta: 0:30:28 lr: 0.000020 loss_cls: 4.2469 (3.9094) grad_norm: 2.3265 (2.3260) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 12:59:59 root] (utils.py 283): INFO Epoch: [3] [ 120/2502] eta: 0:30:20 lr: 0.000020 loss_cls: 4.1270 (3.9227) grad_norm: 2.3002 (2.3246) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 13:00:07 root] (utils.py 283): INFO Epoch: [3] [ 130/2502] eta: 0:30:16 lr: 0.000020 loss_cls: 4.2133 (3.9412) grad_norm: 2.2717 (2.3294) time: 0.7726 data: 0.0003 max mem: 8426 +[2024-12-10 13:00:14 root] (utils.py 283): INFO Epoch: [3] [ 140/2502] eta: 0:30:07 lr: 0.000020 loss_cls: 4.0733 (3.9411) grad_norm: 2.2875 (2.3278) time: 0.7723 data: 0.0003 max mem: 8426 +[2024-12-10 13:00:22 root] (utils.py 283): INFO Epoch: [3] [ 150/2502] eta: 0:29:59 lr: 0.000020 loss_cls: 4.0582 (3.9467) grad_norm: 2.2875 (2.3255) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-10 13:00:30 root] (utils.py 283): INFO Epoch: [3] [ 160/2502] eta: 0:29:51 lr: 0.000020 loss_cls: 4.1875 (3.9632) grad_norm: 2.2987 (2.3261) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 13:00:37 root] (utils.py 283): INFO Epoch: [3] [ 170/2502] eta: 0:29:44 lr: 0.000020 loss_cls: 4.1870 (3.9574) grad_norm: 2.3677 (2.3323) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 13:00:45 root] (utils.py 283): INFO Epoch: [3] [ 180/2502] eta: 0:29:36 lr: 0.000020 loss_cls: 3.9859 (3.9601) grad_norm: 2.3767 (2.3341) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 13:00:52 root] (utils.py 283): INFO Epoch: [3] [ 190/2502] eta: 0:29:28 lr: 0.000020 loss_cls: 4.0652 (3.9706) grad_norm: 2.2695 (2.3308) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 13:01:00 root] (utils.py 283): INFO Epoch: [3] [ 200/2502] eta: 0:29:20 lr: 0.000020 loss_cls: 4.2709 (3.9810) grad_norm: 2.2319 (2.3319) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 13:01:08 root] (utils.py 283): INFO Epoch: [3] [ 210/2502] eta: 0:29:12 lr: 0.000020 loss_cls: 4.0869 (3.9760) grad_norm: 2.2553 (2.3323) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 13:01:15 root] (utils.py 283): INFO Epoch: [3] [ 220/2502] eta: 0:29:04 lr: 0.000020 loss_cls: 3.6374 (3.9514) grad_norm: 2.2651 (2.3318) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:01:23 root] (utils.py 283): INFO Epoch: [3] [ 230/2502] eta: 0:28:56 lr: 0.000020 loss_cls: 3.9535 (3.9560) grad_norm: 2.4022 (2.3372) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:01:31 root] (utils.py 283): INFO Epoch: [3] [ 240/2502] eta: 0:28:48 lr: 0.000020 loss_cls: 3.9535 (3.9583) grad_norm: 2.4359 (2.3395) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 13:01:38 root] (utils.py 283): INFO Epoch: [3] [ 250/2502] eta: 0:28:41 lr: 0.000020 loss_cls: 3.8554 (3.9553) grad_norm: 2.3770 (2.3398) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:01:46 root] (utils.py 283): INFO Epoch: [3] [ 260/2502] eta: 0:28:33 lr: 0.000020 loss_cls: 3.9841 (3.9568) grad_norm: 2.3155 (2.3406) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 13:01:54 root] (utils.py 283): INFO Epoch: [3] [ 270/2502] eta: 0:28:25 lr: 0.000020 loss_cls: 4.0986 (3.9572) grad_norm: 2.3251 (2.3414) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 13:02:01 root] (utils.py 283): INFO Epoch: [3] [ 280/2502] eta: 0:28:19 lr: 0.000020 loss_cls: 3.8071 (3.9561) grad_norm: 2.3645 (2.3410) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:09 root] (utils.py 283): INFO Epoch: [3] [ 290/2502] eta: 0:28:13 lr: 0.000020 loss_cls: 3.8344 (3.9612) grad_norm: 2.3503 (2.3404) time: 0.7834 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:17 root] (utils.py 283): INFO Epoch: [3] [ 300/2502] eta: 0:28:07 lr: 0.000020 loss_cls: 4.1359 (3.9625) grad_norm: 2.2717 (2.3388) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:25 root] (utils.py 283): INFO Epoch: [3] [ 310/2502] eta: 0:28:00 lr: 0.000020 loss_cls: 4.1359 (3.9633) grad_norm: 2.2820 (2.3378) time: 0.7821 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:33 root] (utils.py 283): INFO Epoch: [3] [ 320/2502] eta: 0:27:53 lr: 0.000020 loss_cls: 4.0085 (3.9584) grad_norm: 2.2996 (2.3365) time: 0.7809 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:40 root] (utils.py 283): INFO Epoch: [3] [ 330/2502] eta: 0:27:46 lr: 0.000020 loss_cls: 4.0375 (3.9649) grad_norm: 2.2437 (2.3342) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:48 root] (utils.py 283): INFO Epoch: [3] [ 340/2502] eta: 0:27:40 lr: 0.000020 loss_cls: 4.2415 (3.9639) grad_norm: 2.2534 (2.3334) time: 0.7840 data: 0.0002 max mem: 8426 +[2024-12-10 13:02:56 root] (utils.py 283): INFO Epoch: [3] [ 350/2502] eta: 0:27:33 lr: 0.000020 loss_cls: 4.0003 (3.9578) grad_norm: 2.2697 (2.3338) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:04 root] (utils.py 283): INFO Epoch: [3] [ 360/2502] eta: 0:27:26 lr: 0.000020 loss_cls: 4.1185 (3.9661) grad_norm: 2.3236 (2.3350) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:12 root] (utils.py 283): INFO Epoch: [3] [ 370/2502] eta: 0:27:20 lr: 0.000020 loss_cls: 4.2785 (3.9717) grad_norm: 2.2734 (2.3327) time: 0.7847 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:20 root] (utils.py 283): INFO Epoch: [3] [ 380/2502] eta: 0:27:13 lr: 0.000020 loss_cls: 4.2128 (3.9706) grad_norm: 2.2734 (2.3329) time: 0.7849 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:28 root] (utils.py 283): INFO Epoch: [3] [ 390/2502] eta: 0:27:06 lr: 0.000020 loss_cls: 4.2128 (3.9743) grad_norm: 2.2508 (2.3308) time: 0.7845 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:35 root] (utils.py 283): INFO Epoch: [3] [ 400/2502] eta: 0:26:59 lr: 0.000020 loss_cls: 4.2879 (3.9816) grad_norm: 2.2378 (2.3322) time: 0.7840 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:43 root] (utils.py 283): INFO Epoch: [3] [ 410/2502] eta: 0:26:52 lr: 0.000020 loss_cls: 3.8572 (3.9757) grad_norm: 2.3685 (2.3319) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:51 root] (utils.py 283): INFO Epoch: [3] [ 420/2502] eta: 0:26:44 lr: 0.000020 loss_cls: 3.8213 (3.9768) grad_norm: 2.2990 (2.3324) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 13:03:59 root] (utils.py 283): INFO Epoch: [3] [ 430/2502] eta: 0:26:36 lr: 0.000020 loss_cls: 3.9522 (3.9744) grad_norm: 2.2535 (2.3301) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 13:04:06 root] (utils.py 283): INFO Epoch: [3] [ 440/2502] eta: 0:26:28 lr: 0.000020 loss_cls: 4.1531 (3.9791) grad_norm: 2.2597 (2.3300) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:04:14 root] (utils.py 283): INFO Epoch: [3] [ 450/2502] eta: 0:26:21 lr: 0.000020 loss_cls: 3.9021 (3.9746) grad_norm: 2.3468 (2.3308) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 13:04:22 root] (utils.py 283): INFO Epoch: [3] [ 460/2502] eta: 0:26:14 lr: 0.000020 loss_cls: 3.9021 (3.9718) grad_norm: 2.3119 (2.3299) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 13:04:30 root] (utils.py 283): INFO Epoch: [3] [ 470/2502] eta: 0:26:07 lr: 0.000020 loss_cls: 4.0182 (3.9713) grad_norm: 2.2702 (2.3302) time: 0.7842 data: 0.0002 max mem: 8426 +[2024-12-10 13:04:38 root] (utils.py 283): INFO Epoch: [3] [ 480/2502] eta: 0:26:00 lr: 0.000020 loss_cls: 3.9781 (3.9713) grad_norm: 2.3364 (2.3308) time: 0.7859 data: 0.0003 max mem: 8426 +[2024-12-10 13:04:45 root] (utils.py 283): INFO Epoch: [3] [ 490/2502] eta: 0:25:52 lr: 0.000020 loss_cls: 3.9781 (3.9703) grad_norm: 2.3364 (2.3313) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 13:04:53 root] (utils.py 283): INFO Epoch: [3] [ 500/2502] eta: 0:25:45 lr: 0.000020 loss_cls: 4.0784 (3.9692) grad_norm: 2.3216 (2.3313) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:01 root] (utils.py 283): INFO Epoch: [3] [ 510/2502] eta: 0:25:37 lr: 0.000020 loss_cls: 4.0456 (3.9682) grad_norm: 2.3161 (2.3318) time: 0.7787 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:09 root] (utils.py 283): INFO Epoch: [3] [ 520/2502] eta: 0:25:30 lr: 0.000020 loss_cls: 3.7128 (3.9641) grad_norm: 2.2758 (2.3319) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:16 root] (utils.py 283): INFO Epoch: [3] [ 530/2502] eta: 0:25:22 lr: 0.000020 loss_cls: 4.1775 (3.9686) grad_norm: 2.3616 (2.3319) time: 0.7734 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:24 root] (utils.py 283): INFO Epoch: [3] [ 540/2502] eta: 0:25:14 lr: 0.000020 loss_cls: 4.2330 (3.9669) grad_norm: 2.2707 (2.3305) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:32 root] (utils.py 283): INFO Epoch: [3] [ 550/2502] eta: 0:25:06 lr: 0.000020 loss_cls: 3.6177 (3.9600) grad_norm: 2.2966 (2.3322) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:39 root] (utils.py 283): INFO Epoch: [3] [ 560/2502] eta: 0:24:58 lr: 0.000020 loss_cls: 4.1756 (3.9619) grad_norm: 2.2807 (2.3310) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:47 root] (utils.py 283): INFO Epoch: [3] [ 570/2502] eta: 0:24:50 lr: 0.000020 loss_cls: 4.3612 (3.9664) grad_norm: 2.2807 (2.3317) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 13:05:55 root] (utils.py 283): INFO Epoch: [3] [ 580/2502] eta: 0:24:42 lr: 0.000020 loss_cls: 4.2401 (3.9662) grad_norm: 2.3705 (2.3324) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:06:02 root] (utils.py 283): INFO Epoch: [3] [ 590/2502] eta: 0:24:34 lr: 0.000020 loss_cls: 4.1369 (3.9741) grad_norm: 2.3846 (2.3331) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 13:06:10 root] (utils.py 283): INFO Epoch: [3] [ 600/2502] eta: 0:24:26 lr: 0.000020 loss_cls: 4.1034 (3.9700) grad_norm: 2.3443 (2.3335) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 13:06:18 root] (utils.py 283): INFO Epoch: [3] [ 610/2502] eta: 0:24:18 lr: 0.000020 loss_cls: 3.6229 (3.9671) grad_norm: 2.2836 (2.3332) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 13:06:25 root] (utils.py 283): INFO Epoch: [3] [ 620/2502] eta: 0:24:11 lr: 0.000020 loss_cls: 3.9887 (3.9705) grad_norm: 2.2836 (2.3327) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 13:06:33 root] (utils.py 283): INFO Epoch: [3] [ 630/2502] eta: 0:24:03 lr: 0.000020 loss_cls: 3.9989 (3.9681) grad_norm: 2.3621 (2.3336) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 13:06:41 root] (utils.py 283): INFO Epoch: [3] [ 640/2502] eta: 0:23:55 lr: 0.000020 loss_cls: 3.9989 (3.9708) grad_norm: 2.3942 (2.3333) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 13:06:48 root] (utils.py 283): INFO Epoch: [3] [ 650/2502] eta: 0:23:47 lr: 0.000020 loss_cls: 4.2340 (3.9723) grad_norm: 2.3602 (2.3335) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 13:06:56 root] (utils.py 283): INFO Epoch: [3] [ 660/2502] eta: 0:23:39 lr: 0.000020 loss_cls: 4.0219 (3.9702) grad_norm: 2.4067 (2.3348) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 13:07:04 root] (utils.py 283): INFO Epoch: [3] [ 670/2502] eta: 0:23:32 lr: 0.000020 loss_cls: 4.0678 (3.9717) grad_norm: 2.3542 (2.3352) time: 0.7803 data: 0.0003 max mem: 8426 +[2024-12-10 13:07:12 root] (utils.py 283): INFO Epoch: [3] [ 680/2502] eta: 0:23:25 lr: 0.000020 loss_cls: 4.1876 (3.9736) grad_norm: 2.3119 (2.3345) time: 0.7818 data: 0.0003 max mem: 8426 +[2024-12-10 13:07:19 root] (utils.py 283): INFO Epoch: [3] [ 690/2502] eta: 0:23:17 lr: 0.000020 loss_cls: 4.0178 (3.9688) grad_norm: 2.2732 (2.3340) time: 0.7819 data: 0.0003 max mem: 8426 +[2024-12-10 13:07:27 root] (utils.py 283): INFO Epoch: [3] [ 700/2502] eta: 0:23:10 lr: 0.000020 loss_cls: 3.9204 (3.9712) grad_norm: 2.2732 (2.3335) time: 0.7903 data: 0.0003 max mem: 8426 +[2024-12-10 13:07:35 root] (utils.py 283): INFO Epoch: [3] [ 710/2502] eta: 0:23:03 lr: 0.000020 loss_cls: 4.1661 (3.9701) grad_norm: 2.2953 (2.3341) time: 0.7904 data: 0.0003 max mem: 8426 +[2024-12-10 13:07:43 root] (utils.py 283): INFO Epoch: [3] [ 720/2502] eta: 0:22:55 lr: 0.000020 loss_cls: 3.6255 (3.9658) grad_norm: 2.3477 (2.3338) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 13:07:51 root] (utils.py 283): INFO Epoch: [3] [ 730/2502] eta: 0:22:47 lr: 0.000020 loss_cls: 3.4723 (3.9637) grad_norm: 2.3477 (2.3340) time: 0.7753 data: 0.0002 max mem: 8426 +[2024-12-10 13:07:58 root] (utils.py 283): INFO Epoch: [3] [ 740/2502] eta: 0:22:40 lr: 0.000020 loss_cls: 4.1216 (3.9647) grad_norm: 2.2686 (2.3336) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 13:08:06 root] (utils.py 283): INFO Epoch: [3] [ 750/2502] eta: 0:22:32 lr: 0.000020 loss_cls: 4.2197 (3.9666) grad_norm: 2.2870 (2.3341) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 13:08:14 root] (utils.py 283): INFO Epoch: [3] [ 760/2502] eta: 0:22:24 lr: 0.000020 loss_cls: 4.3149 (3.9705) grad_norm: 2.2870 (2.3338) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:08:21 root] (utils.py 283): INFO Epoch: [3] [ 770/2502] eta: 0:22:16 lr: 0.000020 loss_cls: 4.3293 (3.9729) grad_norm: 2.2933 (2.3330) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 13:08:29 root] (utils.py 283): INFO Epoch: [3] [ 780/2502] eta: 0:22:08 lr: 0.000020 loss_cls: 4.2014 (3.9731) grad_norm: 2.3060 (2.3340) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 13:08:37 root] (utils.py 283): INFO Epoch: [3] [ 790/2502] eta: 0:22:00 lr: 0.000020 loss_cls: 3.9353 (3.9732) grad_norm: 2.3422 (2.3334) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 13:08:44 root] (utils.py 283): INFO Epoch: [3] [ 800/2502] eta: 0:21:52 lr: 0.000020 loss_cls: 3.8377 (3.9715) grad_norm: 2.2460 (2.3330) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 13:08:52 root] (utils.py 283): INFO Epoch: [3] [ 810/2502] eta: 0:21:44 lr: 0.000020 loss_cls: 3.7845 (3.9704) grad_norm: 2.3237 (2.3331) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 13:09:00 root] (utils.py 283): INFO Epoch: [3] [ 820/2502] eta: 0:21:37 lr: 0.000020 loss_cls: 4.1897 (3.9720) grad_norm: 2.3247 (2.3334) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 13:09:07 root] (utils.py 283): INFO Epoch: [3] [ 830/2502] eta: 0:21:29 lr: 0.000020 loss_cls: 4.2613 (3.9733) grad_norm: 2.3000 (2.3337) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 13:09:15 root] (utils.py 283): INFO Epoch: [3] [ 840/2502] eta: 0:21:21 lr: 0.000020 loss_cls: 3.8671 (3.9704) grad_norm: 2.2551 (2.3325) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 13:09:23 root] (utils.py 283): INFO Epoch: [3] [ 850/2502] eta: 0:21:13 lr: 0.000020 loss_cls: 3.9244 (3.9726) grad_norm: 2.2679 (2.3322) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 13:09:30 root] (utils.py 283): INFO Epoch: [3] [ 860/2502] eta: 0:21:05 lr: 0.000020 loss_cls: 4.2109 (3.9728) grad_norm: 2.2872 (2.3317) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 13:09:38 root] (utils.py 283): INFO Epoch: [3] [ 870/2502] eta: 0:20:57 lr: 0.000020 loss_cls: 4.3588 (3.9755) grad_norm: 2.3088 (2.3328) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 13:09:45 root] (utils.py 283): INFO Epoch: [3] [ 880/2502] eta: 0:20:49 lr: 0.000020 loss_cls: 4.3522 (3.9781) grad_norm: 2.3838 (2.3331) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 13:09:53 root] (utils.py 283): INFO Epoch: [3] [ 890/2502] eta: 0:20:42 lr: 0.000020 loss_cls: 4.0468 (3.9802) grad_norm: 2.3170 (2.3330) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:01 root] (utils.py 283): INFO Epoch: [3] [ 900/2502] eta: 0:20:34 lr: 0.000020 loss_cls: 3.9239 (3.9805) grad_norm: 2.3276 (2.3331) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:08 root] (utils.py 283): INFO Epoch: [3] [ 910/2502] eta: 0:20:26 lr: 0.000020 loss_cls: 3.8918 (3.9772) grad_norm: 2.3172 (2.3329) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:16 root] (utils.py 283): INFO Epoch: [3] [ 920/2502] eta: 0:20:18 lr: 0.000020 loss_cls: 4.0347 (3.9778) grad_norm: 2.3142 (2.3327) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:24 root] (utils.py 283): INFO Epoch: [3] [ 930/2502] eta: 0:20:10 lr: 0.000020 loss_cls: 4.0637 (3.9782) grad_norm: 2.3450 (2.3338) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:31 root] (utils.py 283): INFO Epoch: [3] [ 940/2502] eta: 0:20:03 lr: 0.000020 loss_cls: 4.1770 (3.9810) grad_norm: 2.3937 (2.3341) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:39 root] (utils.py 283): INFO Epoch: [3] [ 950/2502] eta: 0:19:55 lr: 0.000020 loss_cls: 4.2992 (3.9833) grad_norm: 2.3560 (2.3342) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:47 root] (utils.py 283): INFO Epoch: [3] [ 960/2502] eta: 0:19:47 lr: 0.000020 loss_cls: 4.1966 (3.9844) grad_norm: 2.2997 (2.3339) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 13:10:54 root] (utils.py 283): INFO Epoch: [3] [ 970/2502] eta: 0:19:39 lr: 0.000020 loss_cls: 3.9699 (3.9824) grad_norm: 2.2715 (2.3338) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 13:11:02 root] (utils.py 283): INFO Epoch: [3] [ 980/2502] eta: 0:19:32 lr: 0.000020 loss_cls: 3.6232 (3.9802) grad_norm: 2.3060 (2.3334) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 13:11:10 root] (utils.py 283): INFO Epoch: [3] [ 990/2502] eta: 0:19:24 lr: 0.000020 loss_cls: 4.1119 (3.9856) grad_norm: 2.3094 (2.3337) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:11:17 root] (utils.py 283): INFO Epoch: [3] [1000/2502] eta: 0:19:16 lr: 0.000020 loss_cls: 4.4166 (3.9868) grad_norm: 2.3515 (2.3338) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 13:11:25 root] (utils.py 283): INFO Epoch: [3] [1010/2502] eta: 0:19:08 lr: 0.000020 loss_cls: 4.1179 (3.9862) grad_norm: 2.2936 (2.3338) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:11:33 root] (utils.py 283): INFO Epoch: [3] [1020/2502] eta: 0:19:01 lr: 0.000020 loss_cls: 4.0676 (3.9874) grad_norm: 2.3828 (2.3344) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 13:11:40 root] (utils.py 283): INFO Epoch: [3] [1030/2502] eta: 0:18:53 lr: 0.000020 loss_cls: 4.2004 (3.9895) grad_norm: 2.3549 (2.3337) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 13:11:48 root] (utils.py 283): INFO Epoch: [3] [1040/2502] eta: 0:18:45 lr: 0.000020 loss_cls: 4.2964 (3.9908) grad_norm: 2.3219 (2.3338) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 13:11:55 root] (utils.py 283): INFO Epoch: [3] [1050/2502] eta: 0:18:37 lr: 0.000020 loss_cls: 4.4117 (3.9935) grad_norm: 2.3508 (2.3340) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 13:12:03 root] (utils.py 283): INFO Epoch: [3] [1060/2502] eta: 0:18:29 lr: 0.000020 loss_cls: 4.2277 (3.9941) grad_norm: 2.3352 (2.3339) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:12:11 root] (utils.py 283): INFO Epoch: [3] [1070/2502] eta: 0:18:22 lr: 0.000020 loss_cls: 3.9840 (3.9933) grad_norm: 2.2949 (2.3337) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 13:12:18 root] (utils.py 283): INFO Epoch: [3] [1080/2502] eta: 0:18:14 lr: 0.000020 loss_cls: 3.9538 (3.9928) grad_norm: 2.2593 (2.3335) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 13:12:26 root] (utils.py 283): INFO Epoch: [3] [1090/2502] eta: 0:18:06 lr: 0.000020 loss_cls: 4.3176 (3.9950) grad_norm: 2.2682 (2.3333) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:12:34 root] (utils.py 283): INFO Epoch: [3] [1100/2502] eta: 0:17:58 lr: 0.000020 loss_cls: 4.3308 (3.9970) grad_norm: 2.2877 (2.3331) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 13:12:41 root] (utils.py 283): INFO Epoch: [3] [1110/2502] eta: 0:17:51 lr: 0.000020 loss_cls: 4.2063 (3.9976) grad_norm: 2.3175 (2.3330) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 13:12:49 root] (utils.py 283): INFO Epoch: [3] [1120/2502] eta: 0:17:43 lr: 0.000020 loss_cls: 4.1579 (3.9987) grad_norm: 2.3750 (2.3335) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 13:12:57 root] (utils.py 283): INFO Epoch: [3] [1130/2502] eta: 0:17:35 lr: 0.000020 loss_cls: 4.0022 (3.9967) grad_norm: 2.3767 (2.3341) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:04 root] (utils.py 283): INFO Epoch: [3] [1140/2502] eta: 0:17:27 lr: 0.000020 loss_cls: 4.1502 (3.9988) grad_norm: 2.3779 (2.3347) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:12 root] (utils.py 283): INFO Epoch: [3] [1150/2502] eta: 0:17:19 lr: 0.000020 loss_cls: 4.1502 (3.9971) grad_norm: 2.3228 (2.3348) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:19 root] (utils.py 283): INFO Epoch: [3] [1160/2502] eta: 0:17:12 lr: 0.000020 loss_cls: 4.0924 (3.9975) grad_norm: 2.3633 (2.3350) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:27 root] (utils.py 283): INFO Epoch: [3] [1170/2502] eta: 0:17:04 lr: 0.000020 loss_cls: 4.0811 (3.9962) grad_norm: 2.3247 (2.3348) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:35 root] (utils.py 283): INFO Epoch: [3] [1180/2502] eta: 0:16:56 lr: 0.000020 loss_cls: 4.0164 (3.9935) grad_norm: 2.2800 (2.3345) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:42 root] (utils.py 283): INFO Epoch: [3] [1190/2502] eta: 0:16:48 lr: 0.000020 loss_cls: 3.6957 (3.9925) grad_norm: 2.2757 (2.3351) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:50 root] (utils.py 283): INFO Epoch: [3] [1200/2502] eta: 0:16:41 lr: 0.000020 loss_cls: 4.0250 (3.9933) grad_norm: 2.3120 (2.3350) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:13:58 root] (utils.py 283): INFO Epoch: [3] [1210/2502] eta: 0:16:33 lr: 0.000020 loss_cls: 3.9403 (3.9931) grad_norm: 2.2983 (2.3344) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:05 root] (utils.py 283): INFO Epoch: [3] [1220/2502] eta: 0:16:25 lr: 0.000020 loss_cls: 3.9403 (3.9927) grad_norm: 2.2873 (2.3348) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:13 root] (utils.py 283): INFO Epoch: [3] [1230/2502] eta: 0:16:17 lr: 0.000020 loss_cls: 4.2265 (3.9939) grad_norm: 2.3250 (2.3352) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:20 root] (utils.py 283): INFO Epoch: [3] [1240/2502] eta: 0:16:10 lr: 0.000020 loss_cls: 4.2040 (3.9951) grad_norm: 2.3154 (2.3348) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:28 root] (utils.py 283): INFO Epoch: [3] [1250/2502] eta: 0:16:02 lr: 0.000020 loss_cls: 4.2009 (3.9969) grad_norm: 2.2706 (2.3346) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:36 root] (utils.py 283): INFO Epoch: [3] [1260/2502] eta: 0:15:54 lr: 0.000020 loss_cls: 4.2350 (3.9975) grad_norm: 2.3378 (2.3342) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:44 root] (utils.py 283): INFO Epoch: [3] [1270/2502] eta: 0:15:47 lr: 0.000020 loss_cls: 4.1195 (3.9974) grad_norm: 2.3039 (2.3341) time: 0.7758 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:51 root] (utils.py 283): INFO Epoch: [3] [1280/2502] eta: 0:15:39 lr: 0.000020 loss_cls: 4.1195 (3.9978) grad_norm: 2.3039 (2.3341) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:14:59 root] (utils.py 283): INFO Epoch: [3] [1290/2502] eta: 0:15:31 lr: 0.000020 loss_cls: 4.2639 (3.9984) grad_norm: 2.2675 (2.3337) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:07 root] (utils.py 283): INFO Epoch: [3] [1300/2502] eta: 0:15:24 lr: 0.000020 loss_cls: 4.0194 (3.9966) grad_norm: 2.2381 (2.3333) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:14 root] (utils.py 283): INFO Epoch: [3] [1310/2502] eta: 0:15:16 lr: 0.000020 loss_cls: 3.8704 (3.9974) grad_norm: 2.3082 (2.3338) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:22 root] (utils.py 283): INFO Epoch: [3] [1320/2502] eta: 0:15:08 lr: 0.000020 loss_cls: 4.1329 (3.9982) grad_norm: 2.3205 (2.3341) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:30 root] (utils.py 283): INFO Epoch: [3] [1330/2502] eta: 0:15:00 lr: 0.000020 loss_cls: 4.1405 (3.9984) grad_norm: 2.2624 (2.3334) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:37 root] (utils.py 283): INFO Epoch: [3] [1340/2502] eta: 0:14:53 lr: 0.000020 loss_cls: 4.1405 (3.9997) grad_norm: 2.2405 (2.3332) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:45 root] (utils.py 283): INFO Epoch: [3] [1350/2502] eta: 0:14:45 lr: 0.000020 loss_cls: 4.1319 (3.9996) grad_norm: 2.3434 (2.3340) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:15:53 root] (utils.py 283): INFO Epoch: [3] [1360/2502] eta: 0:14:37 lr: 0.000020 loss_cls: 3.7555 (3.9991) grad_norm: 2.4211 (2.3350) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:00 root] (utils.py 283): INFO Epoch: [3] [1370/2502] eta: 0:14:29 lr: 0.000020 loss_cls: 4.1392 (3.9992) grad_norm: 2.4084 (2.3351) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:08 root] (utils.py 283): INFO Epoch: [3] [1380/2502] eta: 0:14:22 lr: 0.000020 loss_cls: 4.1392 (3.9989) grad_norm: 2.2886 (2.3349) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:15 root] (utils.py 283): INFO Epoch: [3] [1390/2502] eta: 0:14:14 lr: 0.000020 loss_cls: 3.9781 (3.9980) grad_norm: 2.2686 (2.3346) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:23 root] (utils.py 283): INFO Epoch: [3] [1400/2502] eta: 0:14:06 lr: 0.000020 loss_cls: 3.8478 (3.9966) grad_norm: 2.2686 (2.3348) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:31 root] (utils.py 283): INFO Epoch: [3] [1410/2502] eta: 0:13:58 lr: 0.000020 loss_cls: 4.1156 (3.9982) grad_norm: 2.3713 (2.3349) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:38 root] (utils.py 283): INFO Epoch: [3] [1420/2502] eta: 0:13:51 lr: 0.000020 loss_cls: 4.2071 (3.9994) grad_norm: 2.3713 (2.3350) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 13:16:46 root] (utils.py 283): INFO Epoch: [3] [1430/2502] eta: 0:13:43 lr: 0.000020 loss_cls: 4.2071 (4.0014) grad_norm: 2.3423 (2.3349) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 13:16:53 root] (utils.py 283): INFO Epoch: [3] [1440/2502] eta: 0:13:35 lr: 0.000020 loss_cls: 4.0055 (4.0004) grad_norm: 2.2752 (2.3347) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 13:17:01 root] (utils.py 283): INFO Epoch: [3] [1450/2502] eta: 0:13:28 lr: 0.000020 loss_cls: 3.7677 (3.9996) grad_norm: 2.2659 (2.3347) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 13:17:09 root] (utils.py 283): INFO Epoch: [3] [1460/2502] eta: 0:13:20 lr: 0.000020 loss_cls: 4.4916 (4.0034) grad_norm: 2.2874 (2.3342) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 13:17:16 root] (utils.py 283): INFO Epoch: [3] [1470/2502] eta: 0:13:12 lr: 0.000020 loss_cls: 4.3993 (4.0028) grad_norm: 2.3029 (2.3348) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 13:17:24 root] (utils.py 283): INFO Epoch: [3] [1480/2502] eta: 0:13:04 lr: 0.000020 loss_cls: 4.0317 (4.0031) grad_norm: 2.4006 (2.3351) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 13:17:32 root] (utils.py 283): INFO Epoch: [3] [1490/2502] eta: 0:12:57 lr: 0.000020 loss_cls: 3.8717 (4.0027) grad_norm: 2.3395 (2.3353) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 13:17:39 root] (utils.py 283): INFO Epoch: [3] [1500/2502] eta: 0:12:49 lr: 0.000020 loss_cls: 3.6934 (3.9994) grad_norm: 2.3144 (2.3351) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 13:17:47 root] (utils.py 283): INFO Epoch: [3] [1510/2502] eta: 0:12:41 lr: 0.000020 loss_cls: 3.6657 (3.9988) grad_norm: 2.2549 (2.3349) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 13:17:54 root] (utils.py 283): INFO Epoch: [3] [1520/2502] eta: 0:12:34 lr: 0.000020 loss_cls: 4.1875 (3.9996) grad_norm: 2.2881 (2.3352) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:02 root] (utils.py 283): INFO Epoch: [3] [1530/2502] eta: 0:12:26 lr: 0.000020 loss_cls: 4.2326 (3.9998) grad_norm: 2.3532 (2.3351) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:10 root] (utils.py 283): INFO Epoch: [3] [1540/2502] eta: 0:12:18 lr: 0.000020 loss_cls: 3.8493 (3.9976) grad_norm: 2.3284 (2.3351) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:17 root] (utils.py 283): INFO Epoch: [3] [1550/2502] eta: 0:12:10 lr: 0.000020 loss_cls: 3.5625 (3.9968) grad_norm: 2.3284 (2.3350) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:25 root] (utils.py 283): INFO Epoch: [3] [1560/2502] eta: 0:12:03 lr: 0.000020 loss_cls: 4.1403 (3.9959) grad_norm: 2.3266 (2.3353) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:32 root] (utils.py 283): INFO Epoch: [3] [1570/2502] eta: 0:11:55 lr: 0.000020 loss_cls: 4.1135 (3.9966) grad_norm: 2.3274 (2.3353) time: 0.7539 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:40 root] (utils.py 283): INFO Epoch: [3] [1580/2502] eta: 0:11:47 lr: 0.000020 loss_cls: 4.0355 (3.9964) grad_norm: 2.2993 (2.3354) time: 0.7543 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:48 root] (utils.py 283): INFO Epoch: [3] [1590/2502] eta: 0:11:39 lr: 0.000020 loss_cls: 3.9275 (3.9950) grad_norm: 2.2770 (2.3352) time: 0.7542 data: 0.0002 max mem: 8426 +[2024-12-10 13:18:55 root] (utils.py 283): INFO Epoch: [3] [1600/2502] eta: 0:11:32 lr: 0.000020 loss_cls: 3.9301 (3.9957) grad_norm: 2.2956 (2.3354) time: 0.7598 data: 0.0003 max mem: 8426 +[2024-12-10 13:19:03 root] (utils.py 283): INFO Epoch: [3] [1610/2502] eta: 0:11:24 lr: 0.000020 loss_cls: 4.0765 (3.9955) grad_norm: 2.2956 (2.3350) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 13:19:10 root] (utils.py 283): INFO Epoch: [3] [1620/2502] eta: 0:11:16 lr: 0.000020 loss_cls: 4.0765 (3.9939) grad_norm: 2.3146 (2.3349) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:19:18 root] (utils.py 283): INFO Epoch: [3] [1630/2502] eta: 0:11:09 lr: 0.000020 loss_cls: 3.7959 (3.9936) grad_norm: 2.3038 (2.3346) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 13:19:26 root] (utils.py 283): INFO Epoch: [3] [1640/2502] eta: 0:11:01 lr: 0.000020 loss_cls: 4.1060 (3.9936) grad_norm: 2.3870 (2.3353) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 13:19:33 root] (utils.py 283): INFO Epoch: [3] [1650/2502] eta: 0:10:53 lr: 0.000020 loss_cls: 3.8659 (3.9924) grad_norm: 2.3976 (2.3353) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 13:19:41 root] (utils.py 283): INFO Epoch: [3] [1660/2502] eta: 0:10:46 lr: 0.000020 loss_cls: 3.8576 (3.9929) grad_norm: 2.3035 (2.3354) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 13:19:49 root] (utils.py 283): INFO Epoch: [3] [1670/2502] eta: 0:10:38 lr: 0.000020 loss_cls: 3.9942 (3.9911) grad_norm: 2.3290 (2.3355) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 13:19:56 root] (utils.py 283): INFO Epoch: [3] [1680/2502] eta: 0:10:30 lr: 0.000020 loss_cls: 3.7101 (3.9900) grad_norm: 2.3180 (2.3355) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 13:20:04 root] (utils.py 283): INFO Epoch: [3] [1690/2502] eta: 0:10:22 lr: 0.000020 loss_cls: 3.9715 (3.9905) grad_norm: 2.2943 (2.3352) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 13:20:12 root] (utils.py 283): INFO Epoch: [3] [1700/2502] eta: 0:10:15 lr: 0.000020 loss_cls: 4.0997 (3.9894) grad_norm: 2.2715 (2.3348) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 13:20:19 root] (utils.py 283): INFO Epoch: [3] [1710/2502] eta: 0:10:07 lr: 0.000020 loss_cls: 4.2577 (3.9913) grad_norm: 2.2818 (2.3349) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 13:20:27 root] (utils.py 283): INFO Epoch: [3] [1720/2502] eta: 0:09:59 lr: 0.000020 loss_cls: 4.2577 (3.9917) grad_norm: 2.2818 (2.3344) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 13:20:35 root] (utils.py 283): INFO Epoch: [3] [1730/2502] eta: 0:09:52 lr: 0.000020 loss_cls: 3.7260 (3.9900) grad_norm: 2.2079 (2.3339) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 13:20:42 root] (utils.py 283): INFO Epoch: [3] [1740/2502] eta: 0:09:44 lr: 0.000020 loss_cls: 3.7673 (3.9893) grad_norm: 2.2262 (2.3333) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:20:50 root] (utils.py 283): INFO Epoch: [3] [1750/2502] eta: 0:09:36 lr: 0.000020 loss_cls: 4.0140 (3.9895) grad_norm: 2.2433 (2.3331) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 13:20:57 root] (utils.py 283): INFO Epoch: [3] [1760/2502] eta: 0:09:29 lr: 0.000020 loss_cls: 4.2738 (3.9908) grad_norm: 2.3092 (2.3330) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 13:21:05 root] (utils.py 283): INFO Epoch: [3] [1770/2502] eta: 0:09:21 lr: 0.000020 loss_cls: 4.2505 (3.9911) grad_norm: 2.3105 (2.3330) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 13:21:13 root] (utils.py 283): INFO Epoch: [3] [1780/2502] eta: 0:09:13 lr: 0.000020 loss_cls: 4.2978 (3.9922) grad_norm: 2.3207 (2.3329) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 13:21:20 root] (utils.py 283): INFO Epoch: [3] [1790/2502] eta: 0:09:06 lr: 0.000020 loss_cls: 4.1618 (3.9912) grad_norm: 2.3426 (2.3330) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:21:28 root] (utils.py 283): INFO Epoch: [3] [1800/2502] eta: 0:08:58 lr: 0.000020 loss_cls: 4.1158 (3.9918) grad_norm: 2.3746 (2.3338) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 13:21:36 root] (utils.py 283): INFO Epoch: [3] [1810/2502] eta: 0:08:50 lr: 0.000020 loss_cls: 4.1158 (3.9903) grad_norm: 2.3968 (2.3338) time: 0.7735 data: 0.0002 max mem: 8426 +[2024-12-10 13:21:43 root] (utils.py 283): INFO Epoch: [3] [1820/2502] eta: 0:08:43 lr: 0.000020 loss_cls: 3.8161 (3.9897) grad_norm: 2.3531 (2.3335) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 13:21:51 root] (utils.py 283): INFO Epoch: [3] [1830/2502] eta: 0:08:35 lr: 0.000020 loss_cls: 3.8161 (3.9883) grad_norm: 2.2786 (2.3331) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 13:21:59 root] (utils.py 283): INFO Epoch: [3] [1840/2502] eta: 0:08:27 lr: 0.000020 loss_cls: 3.6496 (3.9872) grad_norm: 2.2723 (2.3328) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 13:22:07 root] (utils.py 283): INFO Epoch: [3] [1850/2502] eta: 0:08:20 lr: 0.000020 loss_cls: 3.9538 (3.9873) grad_norm: 2.2347 (2.3323) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 13:22:14 root] (utils.py 283): INFO Epoch: [3] [1860/2502] eta: 0:08:12 lr: 0.000020 loss_cls: 4.2754 (3.9893) grad_norm: 2.2481 (2.3323) time: 0.7720 data: 0.0002 max mem: 8426 +[2024-12-10 13:22:22 root] (utils.py 283): INFO Epoch: [3] [1870/2502] eta: 0:08:04 lr: 0.000020 loss_cls: 4.2265 (3.9877) grad_norm: 2.3452 (2.3323) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 13:22:30 root] (utils.py 283): INFO Epoch: [3] [1880/2502] eta: 0:07:57 lr: 0.000020 loss_cls: 4.1114 (3.9885) grad_norm: 2.3201 (2.3324) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 13:22:37 root] (utils.py 283): INFO Epoch: [3] [1890/2502] eta: 0:07:49 lr: 0.000020 loss_cls: 4.2210 (3.9894) grad_norm: 2.3063 (2.3321) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 13:22:45 root] (utils.py 283): INFO Epoch: [3] [1900/2502] eta: 0:07:41 lr: 0.000020 loss_cls: 4.1759 (3.9887) grad_norm: 2.2571 (2.3320) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 13:22:52 root] (utils.py 283): INFO Epoch: [3] [1910/2502] eta: 0:07:34 lr: 0.000020 loss_cls: 4.1492 (3.9898) grad_norm: 2.2994 (2.3318) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 13:23:00 root] (utils.py 283): INFO Epoch: [3] [1920/2502] eta: 0:07:26 lr: 0.000020 loss_cls: 4.4418 (3.9918) grad_norm: 2.2702 (2.3317) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 13:23:08 root] (utils.py 283): INFO Epoch: [3] [1930/2502] eta: 0:07:18 lr: 0.000020 loss_cls: 4.3310 (3.9921) grad_norm: 2.2702 (2.3315) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 13:23:15 root] (utils.py 283): INFO Epoch: [3] [1940/2502] eta: 0:07:11 lr: 0.000020 loss_cls: 4.1721 (3.9931) grad_norm: 2.2510 (2.3312) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 13:23:23 root] (utils.py 283): INFO Epoch: [3] [1950/2502] eta: 0:07:03 lr: 0.000020 loss_cls: 4.0223 (3.9920) grad_norm: 2.2838 (2.3313) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 13:23:31 root] (utils.py 283): INFO Epoch: [3] [1960/2502] eta: 0:06:55 lr: 0.000020 loss_cls: 4.0223 (3.9933) grad_norm: 2.3715 (2.3314) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 13:23:38 root] (utils.py 283): INFO Epoch: [3] [1970/2502] eta: 0:06:48 lr: 0.000020 loss_cls: 4.1652 (3.9931) grad_norm: 2.3610 (2.3315) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 13:23:46 root] (utils.py 283): INFO Epoch: [3] [1980/2502] eta: 0:06:40 lr: 0.000020 loss_cls: 4.0290 (3.9942) grad_norm: 2.3344 (2.3313) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 13:23:53 root] (utils.py 283): INFO Epoch: [3] [1990/2502] eta: 0:06:32 lr: 0.000020 loss_cls: 4.2435 (3.9947) grad_norm: 2.2702 (2.3311) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 13:24:01 root] (utils.py 283): INFO Epoch: [3] [2000/2502] eta: 0:06:24 lr: 0.000020 loss_cls: 3.4450 (3.9912) grad_norm: 2.3043 (2.3312) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 13:24:09 root] (utils.py 283): INFO Epoch: [3] [2010/2502] eta: 0:06:17 lr: 0.000020 loss_cls: 3.3498 (3.9894) grad_norm: 2.3401 (2.3310) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 13:24:17 root] (utils.py 283): INFO Epoch: [3] [2020/2502] eta: 0:06:09 lr: 0.000020 loss_cls: 3.7637 (3.9896) grad_norm: 2.2467 (2.3307) time: 0.7775 data: 0.0003 max mem: 8426 +[2024-12-10 13:24:24 root] (utils.py 283): INFO Epoch: [3] [2030/2502] eta: 0:06:02 lr: 0.000020 loss_cls: 4.1226 (3.9907) grad_norm: 2.3111 (2.3312) time: 0.7739 data: 0.0003 max mem: 8426 +[2024-12-10 13:24:32 root] (utils.py 283): INFO Epoch: [3] [2040/2502] eta: 0:05:54 lr: 0.000020 loss_cls: 3.9678 (3.9902) grad_norm: 2.4292 (2.3313) time: 0.7717 data: 0.0003 max mem: 8426 +[2024-12-10 13:24:40 root] (utils.py 283): INFO Epoch: [3] [2050/2502] eta: 0:05:46 lr: 0.000020 loss_cls: 3.9183 (3.9901) grad_norm: 2.3332 (2.3314) time: 0.7774 data: 0.0003 max mem: 8426 +[2024-12-10 13:24:48 root] (utils.py 283): INFO Epoch: [3] [2060/2502] eta: 0:05:39 lr: 0.000020 loss_cls: 4.0844 (3.9904) grad_norm: 2.3375 (2.3315) time: 0.7780 data: 0.0003 max mem: 8426 +[2024-12-10 13:24:55 root] (utils.py 283): INFO Epoch: [3] [2070/2502] eta: 0:05:31 lr: 0.000020 loss_cls: 3.9889 (3.9904) grad_norm: 2.3960 (2.3322) time: 0.7791 data: 0.0003 max mem: 8426 +[2024-12-10 13:25:03 root] (utils.py 283): INFO Epoch: [3] [2080/2502] eta: 0:05:23 lr: 0.000020 loss_cls: 3.8649 (3.9898) grad_norm: 2.3516 (2.3321) time: 0.7736 data: 0.0003 max mem: 8426 +[2024-12-10 13:25:11 root] (utils.py 283): INFO Epoch: [3] [2090/2502] eta: 0:05:16 lr: 0.000020 loss_cls: 3.8649 (3.9890) grad_norm: 2.3117 (2.3322) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 13:25:18 root] (utils.py 283): INFO Epoch: [3] [2100/2502] eta: 0:05:08 lr: 0.000020 loss_cls: 3.9449 (3.9890) grad_norm: 2.4261 (2.3330) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 13:25:26 root] (utils.py 283): INFO Epoch: [3] [2110/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 3.7963 (3.9876) grad_norm: 2.3824 (2.3330) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 13:25:34 root] (utils.py 283): INFO Epoch: [3] [2120/2502] eta: 0:04:53 lr: 0.000020 loss_cls: 4.0716 (3.9891) grad_norm: 2.3118 (2.3330) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:25:41 root] (utils.py 283): INFO Epoch: [3] [2130/2502] eta: 0:04:45 lr: 0.000020 loss_cls: 4.0757 (3.9881) grad_norm: 2.2503 (2.3326) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:25:49 root] (utils.py 283): INFO Epoch: [3] [2140/2502] eta: 0:04:37 lr: 0.000020 loss_cls: 3.8447 (3.9869) grad_norm: 2.2577 (2.3330) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:25:57 root] (utils.py 283): INFO Epoch: [3] [2150/2502] eta: 0:04:30 lr: 0.000020 loss_cls: 3.9646 (3.9879) grad_norm: 2.3125 (2.3327) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 13:26:04 root] (utils.py 283): INFO Epoch: [3] [2160/2502] eta: 0:04:22 lr: 0.000020 loss_cls: 4.1332 (3.9884) grad_norm: 2.2993 (2.3327) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 13:26:12 root] (utils.py 283): INFO Epoch: [3] [2170/2502] eta: 0:04:14 lr: 0.000020 loss_cls: 4.2690 (3.9900) grad_norm: 2.2973 (2.3327) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 13:26:20 root] (utils.py 283): INFO Epoch: [3] [2180/2502] eta: 0:04:06 lr: 0.000020 loss_cls: 4.2678 (3.9907) grad_norm: 2.2618 (2.3324) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 13:26:27 root] (utils.py 283): INFO Epoch: [3] [2190/2502] eta: 0:03:59 lr: 0.000020 loss_cls: 4.2728 (3.9914) grad_norm: 2.2515 (2.3324) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 13:26:35 root] (utils.py 283): INFO Epoch: [3] [2200/2502] eta: 0:03:51 lr: 0.000020 loss_cls: 4.2937 (3.9926) grad_norm: 2.2413 (2.3322) time: 0.7704 data: 0.0003 max mem: 8426 +[2024-12-10 13:26:43 root] (utils.py 283): INFO Epoch: [3] [2210/2502] eta: 0:03:43 lr: 0.000020 loss_cls: 4.2086 (3.9931) grad_norm: 2.2510 (2.3320) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 13:26:50 root] (utils.py 283): INFO Epoch: [3] [2220/2502] eta: 0:03:36 lr: 0.000020 loss_cls: 4.1302 (3.9928) grad_norm: 2.2510 (2.3318) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 13:26:58 root] (utils.py 283): INFO Epoch: [3] [2230/2502] eta: 0:03:28 lr: 0.000020 loss_cls: 3.9208 (3.9922) grad_norm: 2.2370 (2.3317) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 13:27:06 root] (utils.py 283): INFO Epoch: [3] [2240/2502] eta: 0:03:20 lr: 0.000020 loss_cls: 3.7575 (3.9925) grad_norm: 2.3430 (2.3318) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:13 root] (utils.py 283): INFO Epoch: [3] [2250/2502] eta: 0:03:13 lr: 0.000020 loss_cls: 3.7575 (3.9915) grad_norm: 2.3400 (2.3317) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:21 root] (utils.py 283): INFO Epoch: [3] [2260/2502] eta: 0:03:05 lr: 0.000020 loss_cls: 3.9444 (3.9911) grad_norm: 2.2949 (2.3317) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:28 root] (utils.py 283): INFO Epoch: [3] [2270/2502] eta: 0:02:57 lr: 0.000020 loss_cls: 3.9560 (3.9904) grad_norm: 2.2549 (2.3315) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:36 root] (utils.py 283): INFO Epoch: [3] [2280/2502] eta: 0:02:50 lr: 0.000020 loss_cls: 3.9724 (3.9905) grad_norm: 2.2766 (2.3315) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:44 root] (utils.py 283): INFO Epoch: [3] [2290/2502] eta: 0:02:42 lr: 0.000020 loss_cls: 4.0352 (3.9909) grad_norm: 2.3382 (2.3316) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:51 root] (utils.py 283): INFO Epoch: [3] [2300/2502] eta: 0:02:34 lr: 0.000020 loss_cls: 4.0973 (3.9913) grad_norm: 2.2885 (2.3314) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:27:59 root] (utils.py 283): INFO Epoch: [3] [2310/2502] eta: 0:02:27 lr: 0.000020 loss_cls: 4.0973 (3.9904) grad_norm: 2.2625 (2.3315) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 13:28:07 root] (utils.py 283): INFO Epoch: [3] [2320/2502] eta: 0:02:19 lr: 0.000020 loss_cls: 3.9300 (3.9905) grad_norm: 2.3225 (2.3316) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 13:28:14 root] (utils.py 283): INFO Epoch: [3] [2330/2502] eta: 0:02:11 lr: 0.000020 loss_cls: 4.0096 (3.9902) grad_norm: 2.3575 (2.3319) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:28:22 root] (utils.py 283): INFO Epoch: [3] [2340/2502] eta: 0:02:04 lr: 0.000020 loss_cls: 3.8860 (3.9897) grad_norm: 2.3506 (2.3320) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:28:29 root] (utils.py 283): INFO Epoch: [3] [2350/2502] eta: 0:01:56 lr: 0.000020 loss_cls: 3.6824 (3.9883) grad_norm: 2.3225 (2.3321) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 13:28:37 root] (utils.py 283): INFO Epoch: [3] [2360/2502] eta: 0:01:48 lr: 0.000020 loss_cls: 3.9563 (3.9888) grad_norm: 2.3558 (2.3325) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 13:28:45 root] (utils.py 283): INFO Epoch: [3] [2370/2502] eta: 0:01:41 lr: 0.000020 loss_cls: 4.0399 (3.9885) grad_norm: 2.4203 (2.3326) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 13:28:52 root] (utils.py 283): INFO Epoch: [3] [2380/2502] eta: 0:01:33 lr: 0.000020 loss_cls: 4.0661 (3.9889) grad_norm: 2.3913 (2.3326) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 13:29:00 root] (utils.py 283): INFO Epoch: [3] [2390/2502] eta: 0:01:25 lr: 0.000020 loss_cls: 4.0661 (3.9887) grad_norm: 2.2771 (2.3324) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 13:29:08 root] (utils.py 283): INFO Epoch: [3] [2400/2502] eta: 0:01:18 lr: 0.000020 loss_cls: 4.0080 (3.9884) grad_norm: 2.3124 (2.3326) time: 0.7782 data: 0.0002 max mem: 8426 +[2024-12-10 13:29:16 root] (utils.py 283): INFO Epoch: [3] [2410/2502] eta: 0:01:10 lr: 0.000020 loss_cls: 4.1735 (3.9896) grad_norm: 2.3127 (2.3326) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 13:29:23 root] (utils.py 283): INFO Epoch: [3] [2420/2502] eta: 0:01:02 lr: 0.000020 loss_cls: 4.2064 (3.9893) grad_norm: 2.3333 (2.3325) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:29:31 root] (utils.py 283): INFO Epoch: [3] [2430/2502] eta: 0:00:55 lr: 0.000020 loss_cls: 4.1785 (3.9893) grad_norm: 2.3169 (2.3325) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 13:29:39 root] (utils.py 283): INFO Epoch: [3] [2440/2502] eta: 0:00:47 lr: 0.000020 loss_cls: 4.1785 (3.9899) grad_norm: 2.3326 (2.3327) time: 0.7769 data: 0.0002 max mem: 8426 +[2024-12-10 13:29:47 root] (utils.py 283): INFO Epoch: [3] [2450/2502] eta: 0:00:39 lr: 0.000020 loss_cls: 3.9295 (3.9891) grad_norm: 2.3659 (2.3327) time: 0.7788 data: 0.0002 max mem: 8426 +[2024-12-10 13:29:54 root] (utils.py 283): INFO Epoch: [3] [2460/2502] eta: 0:00:32 lr: 0.000020 loss_cls: 3.7290 (3.9886) grad_norm: 2.3328 (2.3329) time: 0.7796 data: 0.0002 max mem: 8426 +[2024-12-10 13:30:02 root] (utils.py 283): INFO Epoch: [3] [2470/2502] eta: 0:00:24 lr: 0.000020 loss_cls: 3.9952 (3.9890) grad_norm: 2.3328 (2.3331) time: 0.7834 data: 0.0002 max mem: 8426 +[2024-12-10 13:30:10 root] (utils.py 283): INFO Epoch: [3] [2480/2502] eta: 0:00:16 lr: 0.000020 loss_cls: 3.9952 (3.9881) grad_norm: 2.4012 (2.3334) time: 0.7851 data: 0.0002 max mem: 8426 +[2024-12-10 13:30:18 root] (utils.py 283): INFO Epoch: [3] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 3.8664 (3.9876) grad_norm: 2.3145 (2.3335) time: 0.8138 data: 0.0230 max mem: 8426 +[2024-12-10 13:30:26 root] (utils.py 283): INFO Epoch: [3] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.9656 (3.9884) grad_norm: 2.3145 (2.3336) time: 0.8075 data: 0.0230 max mem: 8426 +[2024-12-10 13:30:27 root] (utils.py 283): INFO Epoch: [3] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 3.8664 (3.9882) grad_norm: 2.3145 (2.3336) time: 0.8064 data: 0.0230 max mem: 8426 +[2024-12-10 13:30:27 root] (utils.py 297): INFO Epoch: [3] Total time: 0:32:00 (0.7676 s / it) +[2024-12-10 13:30:27 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 3.8664 (3.9860) grad_norm: 2.3145 (2.3336) +[2024-12-10 13:30:27 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6627 (0.6627) acc1: 85.1562 (85.1562) acc3: 95.3125 (95.3125) acc5: 100.0000 (100.0000) time: 0.1277 data: 0.0004 max mem: 8426 +[2024-12-10 13:30:29 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7377 (0.8179) acc1: 85.1562 (81.8892) acc3: 94.5312 (93.1108) acc5: 96.8750 (96.8040) time: 0.1278 data: 0.0003 max mem: 8426 +[2024-12-10 13:30:30 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8796 (0.8717) acc1: 82.0312 (81.0268) acc3: 92.1875 (92.5595) acc5: 95.3125 (95.8705) time: 0.1279 data: 0.0003 max mem: 8426 +[2024-12-10 13:30:31 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9481 (0.8810) acc1: 80.4688 (80.3931) acc3: 92.9688 (92.8427) acc5: 95.3125 (95.8921) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 13:30:33 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8071 (0.8652) acc1: 80.4688 (80.9261) acc3: 94.5312 (93.0831) acc5: 96.0938 (96.0175) time: 0.1339 data: 0.0062 max mem: 8426 +[2024-12-10 13:30:35 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0677 (0.9549) acc1: 75.0000 (78.7684) acc3: 89.0625 (91.6820) acc5: 92.1875 (94.7917) time: 0.1619 data: 0.0338 max mem: 8426 +[2024-12-10 13:30:37 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2477 (1.0009) acc1: 70.3125 (77.9329) acc3: 85.1562 (90.8427) acc5: 89.8438 (94.0061) time: 0.1945 data: 0.0657 max mem: 8426 +[2024-12-10 13:30:38 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2142 (1.0430) acc1: 72.6562 (76.9146) acc3: 86.7188 (90.2619) acc5: 89.8438 (93.5519) time: 0.1802 data: 0.0494 max mem: 8426 +[2024-12-10 13:30:39 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2380 (1.0772) acc1: 72.6562 (76.1863) acc3: 86.7188 (89.6316) acc5: 89.8438 (92.9880) time: 0.1418 data: 0.0119 max mem: 8426 +[2024-12-10 13:30:41 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2989 (1.1057) acc1: 69.5312 (75.4207) acc3: 85.1562 (89.2514) acc5: 89.8438 (92.6854) time: 0.1282 data: 0.0006 max mem: 8426 +[2024-12-10 13:30:42 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1218 (1.0933) acc1: 73.4375 (75.6480) acc3: 88.2812 (89.4800) acc5: 92.1875 (92.9040) time: 0.1278 data: 0.0006 max mem: 8426 +[2024-12-10 13:30:42 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1457 s / it) +[2024-12-10 13:30:42 root] (engine.py 264): INFO * Acc@1 75.496 Acc@3 89.480 Acc@5 92.726 loss 1.093 flops 1.285 layer_flops 1.251 +[2024-12-10 13:30:42 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 13:30:42 root] (main.py 576): INFO Max accuracy: 75.50% +[2024-12-10 13:30:43 root] (utils.py 283): INFO Epoch: [4] [ 0/2502] eta: 0:33:36 lr: 0.000020 loss_cls: 4.0036 (4.0036) grad_norm: 2.3706 (2.3706) time: 0.8060 data: 0.0005 max mem: 8426 +[2024-12-10 13:30:50 root] (utils.py 283): INFO Epoch: [4] [ 10/2502] eta: 0:31:46 lr: 0.000020 loss_cls: 4.1060 (4.1380) grad_norm: 2.3706 (2.3919) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 13:30:58 root] (utils.py 283): INFO Epoch: [4] [ 20/2502] eta: 0:31:35 lr: 0.000020 loss_cls: 4.3143 (4.2081) grad_norm: 2.3919 (2.3960) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 13:31:06 root] (utils.py 283): INFO Epoch: [4] [ 30/2502] eta: 0:31:30 lr: 0.000020 loss_cls: 4.4138 (4.2332) grad_norm: 2.3880 (2.3874) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 13:31:13 root] (utils.py 283): INFO Epoch: [4] [ 40/2502] eta: 0:31:18 lr: 0.000020 loss_cls: 4.0813 (4.0868) grad_norm: 2.3154 (2.3728) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 13:31:21 root] (utils.py 283): INFO Epoch: [4] [ 50/2502] eta: 0:31:13 lr: 0.000020 loss_cls: 4.1367 (4.1339) grad_norm: 2.2858 (2.3672) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:31:29 root] (utils.py 283): INFO Epoch: [4] [ 60/2502] eta: 0:31:06 lr: 0.000020 loss_cls: 4.1907 (4.1147) grad_norm: 2.2858 (2.3484) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 13:31:36 root] (utils.py 283): INFO Epoch: [4] [ 70/2502] eta: 0:31:01 lr: 0.000020 loss_cls: 4.1027 (4.1251) grad_norm: 2.3091 (2.3615) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 13:31:44 root] (utils.py 283): INFO Epoch: [4] [ 80/2502] eta: 0:30:54 lr: 0.000020 loss_cls: 4.1027 (4.1040) grad_norm: 2.3091 (2.3528) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 13:31:52 root] (utils.py 283): INFO Epoch: [4] [ 90/2502] eta: 0:30:47 lr: 0.000020 loss_cls: 4.2478 (4.1165) grad_norm: 2.3321 (2.3560) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 13:31:59 root] (utils.py 283): INFO Epoch: [4] [ 100/2502] eta: 0:30:40 lr: 0.000020 loss_cls: 4.1129 (4.0831) grad_norm: 2.3433 (2.3510) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 13:32:07 root] (utils.py 283): INFO Epoch: [4] [ 110/2502] eta: 0:30:31 lr: 0.000020 loss_cls: 3.9798 (4.0924) grad_norm: 2.2534 (2.3431) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 13:32:14 root] (utils.py 283): INFO Epoch: [4] [ 120/2502] eta: 0:30:22 lr: 0.000020 loss_cls: 4.0687 (4.0857) grad_norm: 2.2617 (2.3446) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 13:32:22 root] (utils.py 283): INFO Epoch: [4] [ 130/2502] eta: 0:30:15 lr: 0.000020 loss_cls: 4.2529 (4.0837) grad_norm: 2.2599 (2.3364) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 13:32:30 root] (utils.py 283): INFO Epoch: [4] [ 140/2502] eta: 0:30:07 lr: 0.000020 loss_cls: 3.9907 (4.0611) grad_norm: 2.2505 (2.3412) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 13:32:37 root] (utils.py 283): INFO Epoch: [4] [ 150/2502] eta: 0:30:00 lr: 0.000020 loss_cls: 3.6869 (4.0497) grad_norm: 2.2700 (2.3372) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 13:32:45 root] (utils.py 283): INFO Epoch: [4] [ 160/2502] eta: 0:29:52 lr: 0.000020 loss_cls: 4.1250 (4.0533) grad_norm: 2.2700 (2.3359) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 13:32:53 root] (utils.py 283): INFO Epoch: [4] [ 170/2502] eta: 0:29:44 lr: 0.000020 loss_cls: 4.2306 (4.0629) grad_norm: 2.2872 (2.3322) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 13:33:00 root] (utils.py 283): INFO Epoch: [4] [ 180/2502] eta: 0:29:37 lr: 0.000020 loss_cls: 4.2013 (4.0537) grad_norm: 2.2576 (2.3292) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 13:33:08 root] (utils.py 283): INFO Epoch: [4] [ 190/2502] eta: 0:29:30 lr: 0.000020 loss_cls: 4.2338 (4.0648) grad_norm: 2.2967 (2.3292) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 13:33:16 root] (utils.py 283): INFO Epoch: [4] [ 200/2502] eta: 0:29:22 lr: 0.000020 loss_cls: 4.2338 (4.0691) grad_norm: 2.3518 (2.3310) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 13:33:23 root] (utils.py 283): INFO Epoch: [4] [ 210/2502] eta: 0:29:13 lr: 0.000020 loss_cls: 4.1040 (4.0624) grad_norm: 2.3586 (2.3338) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 13:33:31 root] (utils.py 283): INFO Epoch: [4] [ 220/2502] eta: 0:29:05 lr: 0.000020 loss_cls: 3.8250 (4.0464) grad_norm: 2.3367 (2.3322) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 13:33:39 root] (utils.py 283): INFO Epoch: [4] [ 230/2502] eta: 0:28:58 lr: 0.000020 loss_cls: 3.8250 (4.0471) grad_norm: 2.3231 (2.3331) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 13:33:46 root] (utils.py 283): INFO Epoch: [4] [ 240/2502] eta: 0:28:50 lr: 0.000020 loss_cls: 4.3304 (4.0567) grad_norm: 2.3535 (2.3363) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 13:33:54 root] (utils.py 283): INFO Epoch: [4] [ 250/2502] eta: 0:28:42 lr: 0.000020 loss_cls: 4.3707 (4.0675) grad_norm: 2.3721 (2.3393) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 13:34:02 root] (utils.py 283): INFO Epoch: [4] [ 260/2502] eta: 0:28:36 lr: 0.000020 loss_cls: 4.2988 (4.0715) grad_norm: 2.3168 (2.3362) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:09 root] (utils.py 283): INFO Epoch: [4] [ 270/2502] eta: 0:28:28 lr: 0.000020 loss_cls: 3.9434 (4.0596) grad_norm: 2.2716 (2.3322) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:17 root] (utils.py 283): INFO Epoch: [4] [ 280/2502] eta: 0:28:20 lr: 0.000020 loss_cls: 3.9434 (4.0625) grad_norm: 2.2532 (2.3317) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:25 root] (utils.py 283): INFO Epoch: [4] [ 290/2502] eta: 0:28:13 lr: 0.000020 loss_cls: 4.0210 (4.0601) grad_norm: 2.2877 (2.3310) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:32 root] (utils.py 283): INFO Epoch: [4] [ 300/2502] eta: 0:28:05 lr: 0.000020 loss_cls: 4.1501 (4.0658) grad_norm: 2.3233 (2.3295) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:40 root] (utils.py 283): INFO Epoch: [4] [ 310/2502] eta: 0:27:58 lr: 0.000020 loss_cls: 3.9111 (4.0533) grad_norm: 2.3161 (2.3276) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:48 root] (utils.py 283): INFO Epoch: [4] [ 320/2502] eta: 0:27:50 lr: 0.000020 loss_cls: 3.7032 (4.0521) grad_norm: 2.3204 (2.3286) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 13:34:55 root] (utils.py 283): INFO Epoch: [4] [ 330/2502] eta: 0:27:43 lr: 0.000020 loss_cls: 4.1381 (4.0553) grad_norm: 2.3601 (2.3296) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:03 root] (utils.py 283): INFO Epoch: [4] [ 340/2502] eta: 0:27:35 lr: 0.000020 loss_cls: 4.1381 (4.0567) grad_norm: 2.3210 (2.3294) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:11 root] (utils.py 283): INFO Epoch: [4] [ 350/2502] eta: 0:27:27 lr: 0.000020 loss_cls: 4.2522 (4.0586) grad_norm: 2.3059 (2.3296) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:18 root] (utils.py 283): INFO Epoch: [4] [ 360/2502] eta: 0:27:20 lr: 0.000020 loss_cls: 4.1342 (4.0612) grad_norm: 2.3059 (2.3297) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:26 root] (utils.py 283): INFO Epoch: [4] [ 370/2502] eta: 0:27:13 lr: 0.000020 loss_cls: 3.9758 (4.0474) grad_norm: 2.3090 (2.3303) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:34 root] (utils.py 283): INFO Epoch: [4] [ 380/2502] eta: 0:27:05 lr: 0.000020 loss_cls: 3.5242 (4.0406) grad_norm: 2.3121 (2.3300) time: 0.7734 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:41 root] (utils.py 283): INFO Epoch: [4] [ 390/2502] eta: 0:26:58 lr: 0.000020 loss_cls: 3.9960 (4.0387) grad_norm: 2.3121 (2.3288) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:49 root] (utils.py 283): INFO Epoch: [4] [ 400/2502] eta: 0:26:50 lr: 0.000020 loss_cls: 3.8851 (4.0322) grad_norm: 2.2924 (2.3281) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 13:35:57 root] (utils.py 283): INFO Epoch: [4] [ 410/2502] eta: 0:26:42 lr: 0.000020 loss_cls: 3.8851 (4.0325) grad_norm: 2.2609 (2.3268) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:04 root] (utils.py 283): INFO Epoch: [4] [ 420/2502] eta: 0:26:34 lr: 0.000020 loss_cls: 3.8391 (4.0249) grad_norm: 2.2671 (2.3259) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:12 root] (utils.py 283): INFO Epoch: [4] [ 430/2502] eta: 0:26:27 lr: 0.000020 loss_cls: 3.7256 (4.0196) grad_norm: 2.3100 (2.3262) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:20 root] (utils.py 283): INFO Epoch: [4] [ 440/2502] eta: 0:26:20 lr: 0.000020 loss_cls: 3.9715 (4.0229) grad_norm: 2.3100 (2.3259) time: 0.7738 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:27 root] (utils.py 283): INFO Epoch: [4] [ 450/2502] eta: 0:26:12 lr: 0.000020 loss_cls: 4.0519 (4.0215) grad_norm: 2.3152 (2.3258) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:35 root] (utils.py 283): INFO Epoch: [4] [ 460/2502] eta: 0:26:04 lr: 0.000020 loss_cls: 4.0264 (4.0208) grad_norm: 2.2700 (2.3244) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:43 root] (utils.py 283): INFO Epoch: [4] [ 470/2502] eta: 0:25:56 lr: 0.000020 loss_cls: 4.2550 (4.0245) grad_norm: 2.2700 (2.3262) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:50 root] (utils.py 283): INFO Epoch: [4] [ 480/2502] eta: 0:25:49 lr: 0.000020 loss_cls: 3.9819 (4.0196) grad_norm: 2.4142 (2.3267) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 13:36:58 root] (utils.py 283): INFO Epoch: [4] [ 490/2502] eta: 0:25:41 lr: 0.000020 loss_cls: 3.9819 (4.0191) grad_norm: 2.2640 (2.3265) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 13:37:06 root] (utils.py 283): INFO Epoch: [4] [ 500/2502] eta: 0:25:33 lr: 0.000020 loss_cls: 4.1835 (4.0222) grad_norm: 2.2784 (2.3264) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 13:37:13 root] (utils.py 283): INFO Epoch: [4] [ 510/2502] eta: 0:25:25 lr: 0.000020 loss_cls: 4.1620 (4.0211) grad_norm: 2.3019 (2.3255) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 13:37:21 root] (utils.py 283): INFO Epoch: [4] [ 520/2502] eta: 0:25:18 lr: 0.000020 loss_cls: 3.9934 (4.0202) grad_norm: 2.3108 (2.3271) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 13:37:29 root] (utils.py 283): INFO Epoch: [4] [ 530/2502] eta: 0:25:10 lr: 0.000020 loss_cls: 3.6760 (4.0130) grad_norm: 2.3949 (2.3286) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 13:37:36 root] (utils.py 283): INFO Epoch: [4] [ 540/2502] eta: 0:25:02 lr: 0.000020 loss_cls: 3.7211 (4.0153) grad_norm: 2.3281 (2.3283) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:37:44 root] (utils.py 283): INFO Epoch: [4] [ 550/2502] eta: 0:24:54 lr: 0.000020 loss_cls: 4.2836 (4.0154) grad_norm: 2.3281 (2.3295) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:37:51 root] (utils.py 283): INFO Epoch: [4] [ 560/2502] eta: 0:24:46 lr: 0.000020 loss_cls: 3.4984 (4.0062) grad_norm: 2.3419 (2.3293) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 13:37:59 root] (utils.py 283): INFO Epoch: [4] [ 570/2502] eta: 0:24:38 lr: 0.000020 loss_cls: 3.5140 (4.0066) grad_norm: 2.2554 (2.3278) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 13:38:07 root] (utils.py 283): INFO Epoch: [4] [ 580/2502] eta: 0:24:31 lr: 0.000020 loss_cls: 3.9411 (4.0056) grad_norm: 2.2212 (2.3270) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 13:38:14 root] (utils.py 283): INFO Epoch: [4] [ 590/2502] eta: 0:24:23 lr: 0.000020 loss_cls: 3.9411 (4.0010) grad_norm: 2.2731 (2.3267) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 13:38:22 root] (utils.py 283): INFO Epoch: [4] [ 600/2502] eta: 0:24:15 lr: 0.000020 loss_cls: 3.9524 (3.9999) grad_norm: 2.2656 (2.3263) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 13:38:29 root] (utils.py 283): INFO Epoch: [4] [ 610/2502] eta: 0:24:07 lr: 0.000020 loss_cls: 4.0191 (3.9975) grad_norm: 2.2842 (2.3261) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:38:37 root] (utils.py 283): INFO Epoch: [4] [ 620/2502] eta: 0:24:00 lr: 0.000020 loss_cls: 4.0013 (3.9960) grad_norm: 2.3125 (2.3274) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 13:38:45 root] (utils.py 283): INFO Epoch: [4] [ 630/2502] eta: 0:23:52 lr: 0.000020 loss_cls: 4.2718 (4.0013) grad_norm: 2.3425 (2.3274) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:38:52 root] (utils.py 283): INFO Epoch: [4] [ 640/2502] eta: 0:23:44 lr: 0.000020 loss_cls: 4.2277 (3.9977) grad_norm: 2.3227 (2.3275) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:00 root] (utils.py 283): INFO Epoch: [4] [ 650/2502] eta: 0:23:37 lr: 0.000020 loss_cls: 3.6782 (3.9944) grad_norm: 2.3227 (2.3280) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:08 root] (utils.py 283): INFO Epoch: [4] [ 660/2502] eta: 0:23:29 lr: 0.000020 loss_cls: 4.0998 (3.9947) grad_norm: 2.2714 (2.3274) time: 0.7758 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:16 root] (utils.py 283): INFO Epoch: [4] [ 670/2502] eta: 0:23:22 lr: 0.000020 loss_cls: 4.1675 (3.9961) grad_norm: 2.2462 (2.3263) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:23 root] (utils.py 283): INFO Epoch: [4] [ 680/2502] eta: 0:23:14 lr: 0.000020 loss_cls: 4.2607 (4.0005) grad_norm: 2.2865 (2.3268) time: 0.7583 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:31 root] (utils.py 283): INFO Epoch: [4] [ 690/2502] eta: 0:23:06 lr: 0.000020 loss_cls: 4.1538 (4.0011) grad_norm: 2.2950 (2.3256) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:38 root] (utils.py 283): INFO Epoch: [4] [ 700/2502] eta: 0:22:58 lr: 0.000020 loss_cls: 4.0549 (4.0025) grad_norm: 2.2871 (2.3256) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:46 root] (utils.py 283): INFO Epoch: [4] [ 710/2502] eta: 0:22:51 lr: 0.000020 loss_cls: 4.3347 (4.0016) grad_norm: 2.3846 (2.3266) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 13:39:54 root] (utils.py 283): INFO Epoch: [4] [ 720/2502] eta: 0:22:43 lr: 0.000020 loss_cls: 4.1439 (4.0001) grad_norm: 2.2812 (2.3258) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:02 root] (utils.py 283): INFO Epoch: [4] [ 730/2502] eta: 0:22:36 lr: 0.000020 loss_cls: 4.1646 (4.0024) grad_norm: 2.2730 (2.3266) time: 0.7770 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:09 root] (utils.py 283): INFO Epoch: [4] [ 740/2502] eta: 0:22:29 lr: 0.000020 loss_cls: 4.4241 (4.0032) grad_norm: 2.3642 (2.3273) time: 0.7783 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:17 root] (utils.py 283): INFO Epoch: [4] [ 750/2502] eta: 0:22:21 lr: 0.000020 loss_cls: 4.1578 (4.0039) grad_norm: 2.3642 (2.3270) time: 0.7783 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:25 root] (utils.py 283): INFO Epoch: [4] [ 760/2502] eta: 0:22:14 lr: 0.000020 loss_cls: 3.9063 (4.0022) grad_norm: 2.2789 (2.3265) time: 0.7723 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:32 root] (utils.py 283): INFO Epoch: [4] [ 770/2502] eta: 0:22:06 lr: 0.000020 loss_cls: 3.9063 (4.0004) grad_norm: 2.3521 (2.3272) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:40 root] (utils.py 283): INFO Epoch: [4] [ 780/2502] eta: 0:21:58 lr: 0.000020 loss_cls: 4.0808 (4.0019) grad_norm: 2.3830 (2.3275) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:48 root] (utils.py 283): INFO Epoch: [4] [ 790/2502] eta: 0:21:51 lr: 0.000020 loss_cls: 4.1432 (3.9978) grad_norm: 2.2674 (2.3274) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 13:40:55 root] (utils.py 283): INFO Epoch: [4] [ 800/2502] eta: 0:21:43 lr: 0.000020 loss_cls: 4.1260 (3.9990) grad_norm: 2.3176 (2.3283) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:03 root] (utils.py 283): INFO Epoch: [4] [ 810/2502] eta: 0:21:35 lr: 0.000020 loss_cls: 4.2258 (3.9992) grad_norm: 2.3502 (2.3281) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:11 root] (utils.py 283): INFO Epoch: [4] [ 820/2502] eta: 0:21:28 lr: 0.000020 loss_cls: 3.9257 (3.9962) grad_norm: 2.3317 (2.3292) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:18 root] (utils.py 283): INFO Epoch: [4] [ 830/2502] eta: 0:21:20 lr: 0.000020 loss_cls: 4.1775 (3.9985) grad_norm: 2.3019 (2.3291) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:26 root] (utils.py 283): INFO Epoch: [4] [ 840/2502] eta: 0:21:12 lr: 0.000020 loss_cls: 4.3468 (3.9988) grad_norm: 2.2843 (2.3288) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:34 root] (utils.py 283): INFO Epoch: [4] [ 850/2502] eta: 0:21:05 lr: 0.000020 loss_cls: 4.2872 (4.0001) grad_norm: 2.3144 (2.3298) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:42 root] (utils.py 283): INFO Epoch: [4] [ 860/2502] eta: 0:20:57 lr: 0.000020 loss_cls: 4.1054 (4.0012) grad_norm: 2.3493 (2.3301) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:49 root] (utils.py 283): INFO Epoch: [4] [ 870/2502] eta: 0:20:50 lr: 0.000020 loss_cls: 4.1959 (4.0013) grad_norm: 2.3017 (2.3296) time: 0.7829 data: 0.0002 max mem: 8426 +[2024-12-10 13:41:57 root] (utils.py 283): INFO Epoch: [4] [ 880/2502] eta: 0:20:43 lr: 0.000020 loss_cls: 4.2234 (4.0008) grad_norm: 2.3020 (2.3300) time: 0.7828 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:05 root] (utils.py 283): INFO Epoch: [4] [ 890/2502] eta: 0:20:35 lr: 0.000020 loss_cls: 4.0958 (3.9996) grad_norm: 2.3155 (2.3299) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:13 root] (utils.py 283): INFO Epoch: [4] [ 900/2502] eta: 0:20:28 lr: 0.000020 loss_cls: 3.9047 (4.0006) grad_norm: 2.2986 (2.3301) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:21 root] (utils.py 283): INFO Epoch: [4] [ 910/2502] eta: 0:20:20 lr: 0.000020 loss_cls: 4.1165 (3.9975) grad_norm: 2.2755 (2.3298) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:28 root] (utils.py 283): INFO Epoch: [4] [ 920/2502] eta: 0:20:13 lr: 0.000020 loss_cls: 3.5489 (3.9952) grad_norm: 2.3057 (2.3295) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:36 root] (utils.py 283): INFO Epoch: [4] [ 930/2502] eta: 0:20:05 lr: 0.000020 loss_cls: 3.6594 (3.9932) grad_norm: 2.3065 (2.3298) time: 0.7805 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:44 root] (utils.py 283): INFO Epoch: [4] [ 940/2502] eta: 0:19:58 lr: 0.000020 loss_cls: 3.9830 (3.9946) grad_norm: 2.2786 (2.3296) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 13:42:52 root] (utils.py 283): INFO Epoch: [4] [ 950/2502] eta: 0:19:51 lr: 0.000020 loss_cls: 4.0327 (3.9936) grad_norm: 2.2833 (2.3295) time: 0.7797 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:00 root] (utils.py 283): INFO Epoch: [4] [ 960/2502] eta: 0:19:43 lr: 0.000020 loss_cls: 4.1201 (3.9968) grad_norm: 2.3048 (2.3295) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:07 root] (utils.py 283): INFO Epoch: [4] [ 970/2502] eta: 0:19:36 lr: 0.000020 loss_cls: 4.2056 (3.9969) grad_norm: 2.3288 (2.3292) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:15 root] (utils.py 283): INFO Epoch: [4] [ 980/2502] eta: 0:19:28 lr: 0.000020 loss_cls: 3.9999 (3.9959) grad_norm: 2.3024 (2.3287) time: 0.7799 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:23 root] (utils.py 283): INFO Epoch: [4] [ 990/2502] eta: 0:19:21 lr: 0.000020 loss_cls: 4.0313 (3.9958) grad_norm: 2.3353 (2.3290) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:31 root] (utils.py 283): INFO Epoch: [4] [1000/2502] eta: 0:19:13 lr: 0.000020 loss_cls: 4.2665 (3.9982) grad_norm: 2.3499 (2.3291) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:39 root] (utils.py 283): INFO Epoch: [4] [1010/2502] eta: 0:19:06 lr: 0.000020 loss_cls: 4.2883 (4.0001) grad_norm: 2.3095 (2.3289) time: 0.7881 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:47 root] (utils.py 283): INFO Epoch: [4] [1020/2502] eta: 0:18:58 lr: 0.000020 loss_cls: 4.0171 (4.0003) grad_norm: 2.3042 (2.3290) time: 0.7885 data: 0.0002 max mem: 8426 +[2024-12-10 13:43:54 root] (utils.py 283): INFO Epoch: [4] [1030/2502] eta: 0:18:51 lr: 0.000020 loss_cls: 3.9146 (4.0001) grad_norm: 2.3004 (2.3294) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 13:44:02 root] (utils.py 283): INFO Epoch: [4] [1040/2502] eta: 0:18:43 lr: 0.000020 loss_cls: 4.1227 (4.0006) grad_norm: 2.2758 (2.3287) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 13:44:10 root] (utils.py 283): INFO Epoch: [4] [1050/2502] eta: 0:18:36 lr: 0.000020 loss_cls: 3.9359 (3.9981) grad_norm: 2.2878 (2.3292) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 13:44:18 root] (utils.py 283): INFO Epoch: [4] [1060/2502] eta: 0:18:28 lr: 0.000020 loss_cls: 4.1052 (3.9998) grad_norm: 2.3275 (2.3291) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 13:44:26 root] (utils.py 283): INFO Epoch: [4] [1070/2502] eta: 0:18:21 lr: 0.000020 loss_cls: 4.1735 (3.9989) grad_norm: 2.2733 (2.3288) time: 0.7834 data: 0.0002 max mem: 8426 +[2024-12-10 13:44:33 root] (utils.py 283): INFO Epoch: [4] [1080/2502] eta: 0:18:13 lr: 0.000020 loss_cls: 3.9055 (3.9975) grad_norm: 2.2733 (2.3283) time: 0.7843 data: 0.0003 max mem: 8426 +[2024-12-10 13:44:41 root] (utils.py 283): INFO Epoch: [4] [1090/2502] eta: 0:18:06 lr: 0.000020 loss_cls: 4.1117 (3.9998) grad_norm: 2.3114 (2.3285) time: 0.7819 data: 0.0003 max mem: 8426 +[2024-12-10 13:44:49 root] (utils.py 283): INFO Epoch: [4] [1100/2502] eta: 0:17:58 lr: 0.000020 loss_cls: 3.9594 (3.9969) grad_norm: 2.3554 (2.3290) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 13:44:57 root] (utils.py 283): INFO Epoch: [4] [1110/2502] eta: 0:17:51 lr: 0.000020 loss_cls: 3.8929 (3.9971) grad_norm: 2.3285 (2.3288) time: 0.7794 data: 0.0003 max mem: 8426 +[2024-12-10 13:45:05 root] (utils.py 283): INFO Epoch: [4] [1120/2502] eta: 0:17:43 lr: 0.000020 loss_cls: 4.0752 (3.9985) grad_norm: 2.3033 (2.3297) time: 0.7784 data: 0.0003 max mem: 8426 +[2024-12-10 13:45:12 root] (utils.py 283): INFO Epoch: [4] [1130/2502] eta: 0:17:35 lr: 0.000020 loss_cls: 4.2282 (3.9981) grad_norm: 2.4108 (2.3303) time: 0.7805 data: 0.0003 max mem: 8426 +[2024-12-10 13:45:20 root] (utils.py 283): INFO Epoch: [4] [1140/2502] eta: 0:17:28 lr: 0.000020 loss_cls: 4.2282 (3.9995) grad_norm: 2.3285 (2.3304) time: 0.7829 data: 0.0002 max mem: 8426 +[2024-12-10 13:45:28 root] (utils.py 283): INFO Epoch: [4] [1150/2502] eta: 0:17:20 lr: 0.000020 loss_cls: 4.0502 (3.9998) grad_norm: 2.3063 (2.3305) time: 0.7825 data: 0.0002 max mem: 8426 +[2024-12-10 13:45:36 root] (utils.py 283): INFO Epoch: [4] [1160/2502] eta: 0:17:13 lr: 0.000020 loss_cls: 4.0340 (3.9990) grad_norm: 2.3559 (2.3314) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-10 13:45:43 root] (utils.py 283): INFO Epoch: [4] [1170/2502] eta: 0:17:05 lr: 0.000020 loss_cls: 3.9911 (3.9977) grad_norm: 2.3519 (2.3315) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:45:51 root] (utils.py 283): INFO Epoch: [4] [1180/2502] eta: 0:16:57 lr: 0.000020 loss_cls: 3.9911 (3.9956) grad_norm: 2.2868 (2.3314) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 13:45:59 root] (utils.py 283): INFO Epoch: [4] [1190/2502] eta: 0:16:49 lr: 0.000020 loss_cls: 4.0338 (3.9948) grad_norm: 2.3039 (2.3313) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 13:46:06 root] (utils.py 283): INFO Epoch: [4] [1200/2502] eta: 0:16:42 lr: 0.000020 loss_cls: 4.2061 (3.9944) grad_norm: 2.3039 (2.3312) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 13:46:14 root] (utils.py 283): INFO Epoch: [4] [1210/2502] eta: 0:16:34 lr: 0.000020 loss_cls: 4.1665 (3.9959) grad_norm: 2.3652 (2.3319) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 13:46:22 root] (utils.py 283): INFO Epoch: [4] [1220/2502] eta: 0:16:26 lr: 0.000020 loss_cls: 3.9432 (3.9939) grad_norm: 2.3649 (2.3319) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 13:46:29 root] (utils.py 283): INFO Epoch: [4] [1230/2502] eta: 0:16:18 lr: 0.000020 loss_cls: 3.6719 (3.9941) grad_norm: 2.3351 (2.3316) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 13:46:37 root] (utils.py 283): INFO Epoch: [4] [1240/2502] eta: 0:16:10 lr: 0.000020 loss_cls: 4.1043 (3.9924) grad_norm: 2.2485 (2.3310) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 13:46:44 root] (utils.py 283): INFO Epoch: [4] [1250/2502] eta: 0:16:03 lr: 0.000020 loss_cls: 4.2805 (3.9936) grad_norm: 2.2372 (2.3307) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 13:46:52 root] (utils.py 283): INFO Epoch: [4] [1260/2502] eta: 0:15:55 lr: 0.000020 loss_cls: 4.3133 (3.9933) grad_norm: 2.2348 (2.3307) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:00 root] (utils.py 283): INFO Epoch: [4] [1270/2502] eta: 0:15:47 lr: 0.000020 loss_cls: 3.8301 (3.9920) grad_norm: 2.2943 (2.3306) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:08 root] (utils.py 283): INFO Epoch: [4] [1280/2502] eta: 0:15:40 lr: 0.000020 loss_cls: 4.0542 (3.9926) grad_norm: 2.2780 (2.3306) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:15 root] (utils.py 283): INFO Epoch: [4] [1290/2502] eta: 0:15:32 lr: 0.000020 loss_cls: 4.2099 (3.9941) grad_norm: 2.3032 (2.3307) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:23 root] (utils.py 283): INFO Epoch: [4] [1300/2502] eta: 0:15:24 lr: 0.000020 loss_cls: 4.2214 (3.9950) grad_norm: 2.3109 (2.3307) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:31 root] (utils.py 283): INFO Epoch: [4] [1310/2502] eta: 0:15:16 lr: 0.000020 loss_cls: 4.0458 (3.9937) grad_norm: 2.2966 (2.3305) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:38 root] (utils.py 283): INFO Epoch: [4] [1320/2502] eta: 0:15:09 lr: 0.000020 loss_cls: 3.8866 (3.9944) grad_norm: 2.3167 (2.3306) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:46 root] (utils.py 283): INFO Epoch: [4] [1330/2502] eta: 0:15:01 lr: 0.000020 loss_cls: 4.0946 (3.9942) grad_norm: 2.2819 (2.3305) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 13:47:53 root] (utils.py 283): INFO Epoch: [4] [1340/2502] eta: 0:14:53 lr: 0.000020 loss_cls: 4.1783 (3.9965) grad_norm: 2.3090 (2.3304) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:01 root] (utils.py 283): INFO Epoch: [4] [1350/2502] eta: 0:14:46 lr: 0.000020 loss_cls: 4.2500 (3.9977) grad_norm: 2.2789 (2.3302) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:09 root] (utils.py 283): INFO Epoch: [4] [1360/2502] eta: 0:14:38 lr: 0.000020 loss_cls: 4.1726 (3.9981) grad_norm: 2.2789 (2.3299) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:16 root] (utils.py 283): INFO Epoch: [4] [1370/2502] eta: 0:14:30 lr: 0.000020 loss_cls: 4.2956 (3.9987) grad_norm: 2.3190 (2.3303) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:24 root] (utils.py 283): INFO Epoch: [4] [1380/2502] eta: 0:14:22 lr: 0.000020 loss_cls: 4.0280 (3.9970) grad_norm: 2.3390 (2.3301) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:32 root] (utils.py 283): INFO Epoch: [4] [1390/2502] eta: 0:14:15 lr: 0.000020 loss_cls: 3.9642 (3.9967) grad_norm: 2.2988 (2.3301) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:39 root] (utils.py 283): INFO Epoch: [4] [1400/2502] eta: 0:14:07 lr: 0.000020 loss_cls: 3.9841 (3.9948) grad_norm: 2.1910 (2.3298) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:47 root] (utils.py 283): INFO Epoch: [4] [1410/2502] eta: 0:13:59 lr: 0.000020 loss_cls: 4.1043 (3.9976) grad_norm: 2.3134 (2.3303) time: 0.7553 data: 0.0002 max mem: 8426 +[2024-12-10 13:48:55 root] (utils.py 283): INFO Epoch: [4] [1420/2502] eta: 0:13:51 lr: 0.000020 loss_cls: 4.3498 (3.9969) grad_norm: 2.4124 (2.3306) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:02 root] (utils.py 283): INFO Epoch: [4] [1430/2502] eta: 0:13:44 lr: 0.000020 loss_cls: 3.9223 (3.9975) grad_norm: 2.3559 (2.3311) time: 0.7791 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:10 root] (utils.py 283): INFO Epoch: [4] [1440/2502] eta: 0:13:36 lr: 0.000020 loss_cls: 4.0222 (3.9969) grad_norm: 2.3428 (2.3309) time: 0.7814 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:18 root] (utils.py 283): INFO Epoch: [4] [1450/2502] eta: 0:13:28 lr: 0.000020 loss_cls: 4.0222 (3.9972) grad_norm: 2.3357 (2.3318) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:25 root] (utils.py 283): INFO Epoch: [4] [1460/2502] eta: 0:13:21 lr: 0.000020 loss_cls: 4.2394 (3.9984) grad_norm: 2.3551 (2.3315) time: 0.7555 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:33 root] (utils.py 283): INFO Epoch: [4] [1470/2502] eta: 0:13:13 lr: 0.000020 loss_cls: 4.2200 (3.9976) grad_norm: 2.3170 (2.3319) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:40 root] (utils.py 283): INFO Epoch: [4] [1480/2502] eta: 0:13:05 lr: 0.000020 loss_cls: 4.3533 (4.0003) grad_norm: 2.3085 (2.3317) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:48 root] (utils.py 283): INFO Epoch: [4] [1490/2502] eta: 0:12:57 lr: 0.000020 loss_cls: 4.4010 (4.0031) grad_norm: 2.2841 (2.3314) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 13:49:56 root] (utils.py 283): INFO Epoch: [4] [1500/2502] eta: 0:12:50 lr: 0.000020 loss_cls: 4.4010 (4.0052) grad_norm: 2.3136 (2.3317) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:03 root] (utils.py 283): INFO Epoch: [4] [1510/2502] eta: 0:12:42 lr: 0.000020 loss_cls: 4.2733 (4.0059) grad_norm: 2.3421 (2.3316) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:11 root] (utils.py 283): INFO Epoch: [4] [1520/2502] eta: 0:12:34 lr: 0.000020 loss_cls: 4.0129 (4.0042) grad_norm: 2.2904 (2.3314) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:18 root] (utils.py 283): INFO Epoch: [4] [1530/2502] eta: 0:12:26 lr: 0.000020 loss_cls: 3.9738 (4.0042) grad_norm: 2.2329 (2.3311) time: 0.7576 data: 0.0003 max mem: 8426 +[2024-12-10 13:50:26 root] (utils.py 283): INFO Epoch: [4] [1540/2502] eta: 0:12:19 lr: 0.000020 loss_cls: 4.2217 (4.0053) grad_norm: 2.3083 (2.3311) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:34 root] (utils.py 283): INFO Epoch: [4] [1550/2502] eta: 0:12:11 lr: 0.000020 loss_cls: 3.9979 (4.0048) grad_norm: 2.3229 (2.3309) time: 0.7577 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:41 root] (utils.py 283): INFO Epoch: [4] [1560/2502] eta: 0:12:03 lr: 0.000020 loss_cls: 3.8402 (4.0040) grad_norm: 2.3595 (2.3309) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:49 root] (utils.py 283): INFO Epoch: [4] [1570/2502] eta: 0:11:55 lr: 0.000020 loss_cls: 3.8913 (4.0024) grad_norm: 2.3398 (2.3310) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 13:50:56 root] (utils.py 283): INFO Epoch: [4] [1580/2502] eta: 0:11:48 lr: 0.000020 loss_cls: 3.9344 (4.0020) grad_norm: 2.2772 (2.3307) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:04 root] (utils.py 283): INFO Epoch: [4] [1590/2502] eta: 0:11:40 lr: 0.000020 loss_cls: 3.9855 (4.0012) grad_norm: 2.2725 (2.3305) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:12 root] (utils.py 283): INFO Epoch: [4] [1600/2502] eta: 0:11:32 lr: 0.000020 loss_cls: 3.8181 (4.0006) grad_norm: 2.2658 (2.3304) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:19 root] (utils.py 283): INFO Epoch: [4] [1610/2502] eta: 0:11:24 lr: 0.000020 loss_cls: 3.8141 (4.0003) grad_norm: 2.3415 (2.3306) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:27 root] (utils.py 283): INFO Epoch: [4] [1620/2502] eta: 0:11:17 lr: 0.000020 loss_cls: 3.7528 (3.9981) grad_norm: 2.3506 (2.3306) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:34 root] (utils.py 283): INFO Epoch: [4] [1630/2502] eta: 0:11:09 lr: 0.000020 loss_cls: 3.8074 (3.9982) grad_norm: 2.3067 (2.3308) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:42 root] (utils.py 283): INFO Epoch: [4] [1640/2502] eta: 0:11:01 lr: 0.000020 loss_cls: 4.3080 (3.9981) grad_norm: 2.2696 (2.3304) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:50 root] (utils.py 283): INFO Epoch: [4] [1650/2502] eta: 0:10:54 lr: 0.000020 loss_cls: 4.3206 (3.9985) grad_norm: 2.2561 (2.3299) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 13:51:57 root] (utils.py 283): INFO Epoch: [4] [1660/2502] eta: 0:10:46 lr: 0.000020 loss_cls: 4.1652 (3.9978) grad_norm: 2.2908 (2.3299) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 13:52:05 root] (utils.py 283): INFO Epoch: [4] [1670/2502] eta: 0:10:38 lr: 0.000020 loss_cls: 3.9820 (3.9991) grad_norm: 2.3780 (2.3308) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:13 root] (utils.py 283): INFO Epoch: [4] [1680/2502] eta: 0:10:31 lr: 0.000020 loss_cls: 4.0525 (3.9981) grad_norm: 2.3101 (2.3303) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:20 root] (utils.py 283): INFO Epoch: [4] [1690/2502] eta: 0:10:23 lr: 0.000020 loss_cls: 4.0087 (3.9985) grad_norm: 2.2545 (2.3302) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:28 root] (utils.py 283): INFO Epoch: [4] [1700/2502] eta: 0:10:15 lr: 0.000020 loss_cls: 4.0613 (3.9989) grad_norm: 2.2700 (2.3301) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:35 root] (utils.py 283): INFO Epoch: [4] [1710/2502] eta: 0:10:07 lr: 0.000020 loss_cls: 3.8417 (3.9970) grad_norm: 2.2518 (2.3299) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:43 root] (utils.py 283): INFO Epoch: [4] [1720/2502] eta: 0:10:00 lr: 0.000020 loss_cls: 3.3836 (3.9951) grad_norm: 2.2518 (2.3296) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:51 root] (utils.py 283): INFO Epoch: [4] [1730/2502] eta: 0:09:52 lr: 0.000020 loss_cls: 3.8235 (3.9944) grad_norm: 2.2582 (2.3292) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 13:52:58 root] (utils.py 283): INFO Epoch: [4] [1740/2502] eta: 0:09:44 lr: 0.000020 loss_cls: 3.8802 (3.9941) grad_norm: 2.2944 (2.3293) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:06 root] (utils.py 283): INFO Epoch: [4] [1750/2502] eta: 0:09:37 lr: 0.000020 loss_cls: 3.8790 (3.9927) grad_norm: 2.2851 (2.3289) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:14 root] (utils.py 283): INFO Epoch: [4] [1760/2502] eta: 0:09:29 lr: 0.000020 loss_cls: 4.1971 (3.9935) grad_norm: 2.2599 (2.3289) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:22 root] (utils.py 283): INFO Epoch: [4] [1770/2502] eta: 0:09:21 lr: 0.000020 loss_cls: 4.2197 (3.9943) grad_norm: 2.2885 (2.3290) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:29 root] (utils.py 283): INFO Epoch: [4] [1780/2502] eta: 0:09:14 lr: 0.000020 loss_cls: 4.1437 (3.9941) grad_norm: 2.2714 (2.3286) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:37 root] (utils.py 283): INFO Epoch: [4] [1790/2502] eta: 0:09:06 lr: 0.000020 loss_cls: 3.8942 (3.9928) grad_norm: 2.2584 (2.3286) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:44 root] (utils.py 283): INFO Epoch: [4] [1800/2502] eta: 0:08:58 lr: 0.000020 loss_cls: 3.9689 (3.9937) grad_norm: 2.3328 (2.3288) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 13:53:52 root] (utils.py 283): INFO Epoch: [4] [1810/2502] eta: 0:08:51 lr: 0.000020 loss_cls: 4.2693 (3.9951) grad_norm: 2.3301 (2.3289) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:00 root] (utils.py 283): INFO Epoch: [4] [1820/2502] eta: 0:08:43 lr: 0.000020 loss_cls: 3.9242 (3.9944) grad_norm: 2.2810 (2.3289) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:07 root] (utils.py 283): INFO Epoch: [4] [1830/2502] eta: 0:08:35 lr: 0.000020 loss_cls: 3.9074 (3.9940) grad_norm: 2.2794 (2.3288) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:15 root] (utils.py 283): INFO Epoch: [4] [1840/2502] eta: 0:08:28 lr: 0.000020 loss_cls: 4.0698 (3.9942) grad_norm: 2.3158 (2.3287) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:23 root] (utils.py 283): INFO Epoch: [4] [1850/2502] eta: 0:08:20 lr: 0.000020 loss_cls: 4.0698 (3.9936) grad_norm: 2.2922 (2.3288) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:30 root] (utils.py 283): INFO Epoch: [4] [1860/2502] eta: 0:08:12 lr: 0.000020 loss_cls: 4.0048 (3.9928) grad_norm: 2.2922 (2.3286) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:38 root] (utils.py 283): INFO Epoch: [4] [1870/2502] eta: 0:08:04 lr: 0.000020 loss_cls: 4.0673 (3.9934) grad_norm: 2.2987 (2.3288) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:46 root] (utils.py 283): INFO Epoch: [4] [1880/2502] eta: 0:07:57 lr: 0.000020 loss_cls: 3.9358 (3.9923) grad_norm: 2.3260 (2.3294) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 13:54:53 root] (utils.py 283): INFO Epoch: [4] [1890/2502] eta: 0:07:49 lr: 0.000020 loss_cls: 4.2690 (3.9941) grad_norm: 2.3260 (2.3292) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:01 root] (utils.py 283): INFO Epoch: [4] [1900/2502] eta: 0:07:41 lr: 0.000020 loss_cls: 4.2690 (3.9948) grad_norm: 2.3357 (2.3295) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:08 root] (utils.py 283): INFO Epoch: [4] [1910/2502] eta: 0:07:34 lr: 0.000020 loss_cls: 4.0936 (3.9948) grad_norm: 2.3226 (2.3292) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:16 root] (utils.py 283): INFO Epoch: [4] [1920/2502] eta: 0:07:26 lr: 0.000020 loss_cls: 3.9399 (3.9952) grad_norm: 2.2706 (2.3291) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:24 root] (utils.py 283): INFO Epoch: [4] [1930/2502] eta: 0:07:18 lr: 0.000020 loss_cls: 3.9032 (3.9943) grad_norm: 2.2706 (2.3288) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:31 root] (utils.py 283): INFO Epoch: [4] [1940/2502] eta: 0:07:11 lr: 0.000020 loss_cls: 3.9674 (3.9949) grad_norm: 2.2920 (2.3288) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:39 root] (utils.py 283): INFO Epoch: [4] [1950/2502] eta: 0:07:03 lr: 0.000020 loss_cls: 3.9744 (3.9936) grad_norm: 2.3069 (2.3288) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:47 root] (utils.py 283): INFO Epoch: [4] [1960/2502] eta: 0:06:55 lr: 0.000020 loss_cls: 3.9506 (3.9939) grad_norm: 2.3869 (2.3292) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 13:55:54 root] (utils.py 283): INFO Epoch: [4] [1970/2502] eta: 0:06:48 lr: 0.000020 loss_cls: 3.8190 (3.9935) grad_norm: 2.3823 (2.3292) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:02 root] (utils.py 283): INFO Epoch: [4] [1980/2502] eta: 0:06:40 lr: 0.000020 loss_cls: 3.8045 (3.9927) grad_norm: 2.2764 (2.3288) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:10 root] (utils.py 283): INFO Epoch: [4] [1990/2502] eta: 0:06:32 lr: 0.000020 loss_cls: 3.8045 (3.9917) grad_norm: 2.1958 (2.3285) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:17 root] (utils.py 283): INFO Epoch: [4] [2000/2502] eta: 0:06:25 lr: 0.000020 loss_cls: 4.2719 (3.9935) grad_norm: 2.2827 (2.3285) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:25 root] (utils.py 283): INFO Epoch: [4] [2010/2502] eta: 0:06:17 lr: 0.000020 loss_cls: 4.2283 (3.9927) grad_norm: 2.3399 (2.3285) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:32 root] (utils.py 283): INFO Epoch: [4] [2020/2502] eta: 0:06:09 lr: 0.000020 loss_cls: 3.9035 (3.9924) grad_norm: 2.3397 (2.3286) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:40 root] (utils.py 283): INFO Epoch: [4] [2030/2502] eta: 0:06:02 lr: 0.000020 loss_cls: 4.1472 (3.9928) grad_norm: 2.2722 (2.3282) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:48 root] (utils.py 283): INFO Epoch: [4] [2040/2502] eta: 0:05:54 lr: 0.000020 loss_cls: 4.0538 (3.9921) grad_norm: 2.2722 (2.3282) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 13:56:55 root] (utils.py 283): INFO Epoch: [4] [2050/2502] eta: 0:05:46 lr: 0.000020 loss_cls: 4.0907 (3.9928) grad_norm: 2.2742 (2.3278) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:03 root] (utils.py 283): INFO Epoch: [4] [2060/2502] eta: 0:05:39 lr: 0.000020 loss_cls: 4.1136 (3.9928) grad_norm: 2.2826 (2.3279) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:11 root] (utils.py 283): INFO Epoch: [4] [2070/2502] eta: 0:05:31 lr: 0.000020 loss_cls: 4.1217 (3.9934) grad_norm: 2.3319 (2.3283) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:18 root] (utils.py 283): INFO Epoch: [4] [2080/2502] eta: 0:05:23 lr: 0.000020 loss_cls: 3.9807 (3.9935) grad_norm: 2.3158 (2.3280) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:26 root] (utils.py 283): INFO Epoch: [4] [2090/2502] eta: 0:05:16 lr: 0.000020 loss_cls: 3.8593 (3.9913) grad_norm: 2.2379 (2.3275) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:34 root] (utils.py 283): INFO Epoch: [4] [2100/2502] eta: 0:05:08 lr: 0.000020 loss_cls: 3.6044 (3.9902) grad_norm: 2.2379 (2.3272) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:41 root] (utils.py 283): INFO Epoch: [4] [2110/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 3.7312 (3.9896) grad_norm: 2.2568 (2.3270) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:49 root] (utils.py 283): INFO Epoch: [4] [2120/2502] eta: 0:04:53 lr: 0.000020 loss_cls: 3.9188 (3.9890) grad_norm: 2.2844 (2.3269) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 13:57:57 root] (utils.py 283): INFO Epoch: [4] [2130/2502] eta: 0:04:45 lr: 0.000020 loss_cls: 3.8177 (3.9878) grad_norm: 2.2844 (2.3268) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:04 root] (utils.py 283): INFO Epoch: [4] [2140/2502] eta: 0:04:37 lr: 0.000020 loss_cls: 3.8250 (3.9875) grad_norm: 2.2774 (2.3265) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 13:58:12 root] (utils.py 283): INFO Epoch: [4] [2150/2502] eta: 0:04:29 lr: 0.000020 loss_cls: 3.9322 (3.9870) grad_norm: 2.2572 (2.3262) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:20 root] (utils.py 283): INFO Epoch: [4] [2160/2502] eta: 0:04:22 lr: 0.000020 loss_cls: 3.9322 (3.9879) grad_norm: 2.2423 (2.3261) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:27 root] (utils.py 283): INFO Epoch: [4] [2170/2502] eta: 0:04:14 lr: 0.000020 loss_cls: 3.9016 (3.9868) grad_norm: 2.3168 (2.3265) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:35 root] (utils.py 283): INFO Epoch: [4] [2180/2502] eta: 0:04:06 lr: 0.000020 loss_cls: 3.6171 (3.9853) grad_norm: 2.3629 (2.3264) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:43 root] (utils.py 283): INFO Epoch: [4] [2190/2502] eta: 0:03:59 lr: 0.000020 loss_cls: 3.6171 (3.9842) grad_norm: 2.3765 (2.3269) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:50 root] (utils.py 283): INFO Epoch: [4] [2200/2502] eta: 0:03:51 lr: 0.000020 loss_cls: 3.6962 (3.9830) grad_norm: 2.3414 (2.3265) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 13:58:58 root] (utils.py 283): INFO Epoch: [4] [2210/2502] eta: 0:03:43 lr: 0.000020 loss_cls: 3.8632 (3.9829) grad_norm: 2.2778 (2.3265) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 13:59:05 root] (utils.py 283): INFO Epoch: [4] [2220/2502] eta: 0:03:36 lr: 0.000020 loss_cls: 4.1235 (3.9832) grad_norm: 2.3829 (2.3270) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 13:59:13 root] (utils.py 283): INFO Epoch: [4] [2230/2502] eta: 0:03:28 lr: 0.000020 loss_cls: 4.1362 (3.9832) grad_norm: 2.4237 (2.3272) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 13:59:21 root] (utils.py 283): INFO Epoch: [4] [2240/2502] eta: 0:03:20 lr: 0.000020 loss_cls: 4.1362 (3.9831) grad_norm: 2.2970 (2.3269) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 13:59:29 root] (utils.py 283): INFO Epoch: [4] [2250/2502] eta: 0:03:13 lr: 0.000020 loss_cls: 4.0657 (3.9839) grad_norm: 2.2991 (2.3270) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 13:59:36 root] (utils.py 283): INFO Epoch: [4] [2260/2502] eta: 0:03:05 lr: 0.000020 loss_cls: 4.2683 (3.9844) grad_norm: 2.3005 (2.3269) time: 0.7750 data: 0.0002 max mem: 8426 +[2024-12-10 13:59:44 root] (utils.py 283): INFO Epoch: [4] [2270/2502] eta: 0:02:57 lr: 0.000020 loss_cls: 3.9457 (3.9834) grad_norm: 2.2906 (2.3267) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 13:59:52 root] (utils.py 283): INFO Epoch: [4] [2280/2502] eta: 0:02:50 lr: 0.000020 loss_cls: 3.9884 (3.9845) grad_norm: 2.2906 (2.3266) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 13:59:59 root] (utils.py 283): INFO Epoch: [4] [2290/2502] eta: 0:02:42 lr: 0.000020 loss_cls: 4.2218 (3.9851) grad_norm: 2.2777 (2.3266) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:07 root] (utils.py 283): INFO Epoch: [4] [2300/2502] eta: 0:02:34 lr: 0.000020 loss_cls: 4.0912 (3.9849) grad_norm: 2.2777 (2.3264) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:15 root] (utils.py 283): INFO Epoch: [4] [2310/2502] eta: 0:02:27 lr: 0.000020 loss_cls: 4.0495 (3.9849) grad_norm: 2.2781 (2.3265) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:22 root] (utils.py 283): INFO Epoch: [4] [2320/2502] eta: 0:02:19 lr: 0.000020 loss_cls: 4.1613 (3.9855) grad_norm: 2.2798 (2.3263) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:30 root] (utils.py 283): INFO Epoch: [4] [2330/2502] eta: 0:02:11 lr: 0.000020 loss_cls: 4.1450 (3.9845) grad_norm: 2.2798 (2.3264) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:38 root] (utils.py 283): INFO Epoch: [4] [2340/2502] eta: 0:02:04 lr: 0.000020 loss_cls: 4.1347 (3.9851) grad_norm: 2.3733 (2.3267) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:45 root] (utils.py 283): INFO Epoch: [4] [2350/2502] eta: 0:01:56 lr: 0.000020 loss_cls: 4.1346 (3.9842) grad_norm: 2.3071 (2.3264) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 14:00:53 root] (utils.py 283): INFO Epoch: [4] [2360/2502] eta: 0:01:48 lr: 0.000020 loss_cls: 3.7619 (3.9823) grad_norm: 2.2787 (2.3265) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:01 root] (utils.py 283): INFO Epoch: [4] [2370/2502] eta: 0:01:41 lr: 0.000020 loss_cls: 3.7619 (3.9817) grad_norm: 2.2816 (2.3264) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:08 root] (utils.py 283): INFO Epoch: [4] [2380/2502] eta: 0:01:33 lr: 0.000020 loss_cls: 4.2202 (3.9818) grad_norm: 2.2729 (2.3264) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:16 root] (utils.py 283): INFO Epoch: [4] [2390/2502] eta: 0:01:25 lr: 0.000020 loss_cls: 3.9856 (3.9810) grad_norm: 2.3028 (2.3263) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:23 root] (utils.py 283): INFO Epoch: [4] [2400/2502] eta: 0:01:18 lr: 0.000020 loss_cls: 3.9166 (3.9814) grad_norm: 2.3700 (2.3264) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:31 root] (utils.py 283): INFO Epoch: [4] [2410/2502] eta: 0:01:10 lr: 0.000020 loss_cls: 3.7919 (3.9803) grad_norm: 2.3700 (2.3265) time: 0.7747 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:39 root] (utils.py 283): INFO Epoch: [4] [2420/2502] eta: 0:01:02 lr: 0.000020 loss_cls: 3.8296 (3.9804) grad_norm: 2.2987 (2.3262) time: 0.7895 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:47 root] (utils.py 283): INFO Epoch: [4] [2430/2502] eta: 0:00:55 lr: 0.000020 loss_cls: 3.8296 (3.9790) grad_norm: 2.3146 (2.3262) time: 0.7890 data: 0.0002 max mem: 8426 +[2024-12-10 14:01:55 root] (utils.py 283): INFO Epoch: [4] [2440/2502] eta: 0:00:47 lr: 0.000020 loss_cls: 3.7796 (3.9792) grad_norm: 2.3418 (2.3263) time: 0.7861 data: 0.0002 max mem: 8426 +[2024-12-10 14:02:03 root] (utils.py 283): INFO Epoch: [4] [2450/2502] eta: 0:00:39 lr: 0.000020 loss_cls: 4.0358 (3.9787) grad_norm: 2.3903 (2.3265) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 14:02:10 root] (utils.py 283): INFO Epoch: [4] [2460/2502] eta: 0:00:32 lr: 0.000020 loss_cls: 4.0899 (3.9796) grad_norm: 2.3654 (2.3264) time: 0.7769 data: 0.0002 max mem: 8426 +[2024-12-10 14:02:18 root] (utils.py 283): INFO Epoch: [4] [2470/2502] eta: 0:00:24 lr: 0.000020 loss_cls: 4.0899 (3.9794) grad_norm: 2.2987 (2.3265) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 14:02:26 root] (utils.py 283): INFO Epoch: [4] [2480/2502] eta: 0:00:16 lr: 0.000020 loss_cls: 4.0510 (3.9792) grad_norm: 2.2938 (2.3265) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 14:02:34 root] (utils.py 283): INFO Epoch: [4] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 4.0539 (3.9791) grad_norm: 2.4101 (2.3272) time: 0.7889 data: 0.0226 max mem: 8426 +[2024-12-10 14:02:42 root] (utils.py 283): INFO Epoch: [4] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 4.0539 (3.9785) grad_norm: 2.4133 (2.3275) time: 0.7886 data: 0.0226 max mem: 8426 +[2024-12-10 14:02:42 root] (utils.py 283): INFO Epoch: [4] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 4.0539 (3.9787) grad_norm: 2.4133 (2.3275) time: 0.7880 data: 0.0226 max mem: 8426 +[2024-12-10 14:02:42 root] (utils.py 297): INFO Epoch: [4] Total time: 0:32:00 (0.7676 s / it) +[2024-12-10 14:02:42 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 4.0539 (3.9856) grad_norm: 2.4133 (2.3275) +[2024-12-10 14:02:43 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6433 (0.6433) acc1: 85.9375 (85.9375) acc3: 96.8750 (96.8750) acc5: 97.6562 (97.6562) time: 0.1275 data: 0.0002 max mem: 8426 +[2024-12-10 14:02:44 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7151 (0.8129) acc1: 82.8125 (81.9602) acc3: 94.5312 (93.6080) acc5: 96.8750 (96.5909) time: 0.1278 data: 0.0003 max mem: 8426 +[2024-12-10 14:02:46 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8714 (0.8718) acc1: 80.4688 (80.8036) acc3: 92.9688 (92.8943) acc5: 95.3125 (95.6845) time: 0.1294 data: 0.0003 max mem: 8426 +[2024-12-10 14:02:47 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9351 (0.8806) acc1: 78.9062 (79.9899) acc3: 92.1875 (93.0444) acc5: 95.3125 (95.7157) time: 0.1350 data: 0.0004 max mem: 8426 +[2024-12-10 14:02:48 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8045 (0.8676) acc1: 79.6875 (80.5640) acc3: 93.7500 (93.1212) acc5: 96.8750 (95.8460) time: 0.1352 data: 0.0012 max mem: 8426 +[2024-12-10 14:02:50 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0602 (0.9537) acc1: 73.4375 (78.4467) acc3: 88.2812 (91.6513) acc5: 92.1875 (94.8223) time: 0.1535 data: 0.0250 max mem: 8426 +[2024-12-10 14:02:51 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2612 (0.9991) acc1: 70.3125 (77.7792) acc3: 85.9375 (90.8683) acc5: 89.8438 (93.9421) time: 0.1607 data: 0.0330 max mem: 8426 +[2024-12-10 14:02:53 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2197 (1.0397) acc1: 74.2188 (76.9366) acc3: 88.2812 (90.3719) acc5: 89.8438 (93.4419) time: 0.1420 data: 0.0143 max mem: 8426 +[2024-12-10 14:02:54 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2390 (1.0737) acc1: 72.6562 (76.1671) acc3: 86.7188 (89.7762) acc5: 89.8438 (92.8627) time: 0.1336 data: 0.0058 max mem: 8426 +[2024-12-10 14:02:56 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2640 (1.1032) acc1: 70.3125 (75.3863) acc3: 85.9375 (89.3887) acc5: 89.0625 (92.5652) time: 0.1552 data: 0.0270 max mem: 8426 +[2024-12-10 14:02:57 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1690 (1.0917) acc1: 72.6562 (75.6480) acc3: 88.2812 (89.6320) acc5: 91.4062 (92.7520) time: 0.1530 data: 0.0269 max mem: 8426 +[2024-12-10 14:02:57 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1428 s / it) +[2024-12-10 14:02:57 root] (engine.py 264): INFO * Acc@1 75.462 Acc@3 89.556 Acc@5 92.696 loss 1.095 flops 1.285 layer_flops 1.251 +[2024-12-10 14:02:57 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 14:02:57 root] (main.py 576): INFO Max accuracy: 75.50% +[2024-12-10 14:02:58 root] (utils.py 283): INFO Epoch: [5] [ 0/2502] eta: 0:31:57 lr: 0.000019 loss_cls: 4.4680 (4.4680) grad_norm: 2.1876 (2.1876) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 14:03:06 root] (utils.py 283): INFO Epoch: [5] [ 10/2502] eta: 0:31:46 lr: 0.000019 loss_cls: 4.2857 (4.1961) grad_norm: 2.3036 (2.3201) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 14:03:13 root] (utils.py 283): INFO Epoch: [5] [ 20/2502] eta: 0:31:40 lr: 0.000019 loss_cls: 4.1193 (4.1288) grad_norm: 2.2692 (2.2967) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 14:03:21 root] (utils.py 283): INFO Epoch: [5] [ 30/2502] eta: 0:31:34 lr: 0.000019 loss_cls: 4.0473 (4.0506) grad_norm: 2.2722 (2.3298) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 14:03:29 root] (utils.py 283): INFO Epoch: [5] [ 40/2502] eta: 0:31:24 lr: 0.000019 loss_cls: 3.9356 (4.0581) grad_norm: 2.3778 (2.3463) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 14:03:36 root] (utils.py 283): INFO Epoch: [5] [ 50/2502] eta: 0:31:16 lr: 0.000019 loss_cls: 4.0739 (4.0136) grad_norm: 2.3544 (2.3401) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 14:03:44 root] (utils.py 283): INFO Epoch: [5] [ 60/2502] eta: 0:31:14 lr: 0.000019 loss_cls: 3.9587 (4.0054) grad_norm: 2.2848 (2.3425) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 14:03:52 root] (utils.py 283): INFO Epoch: [5] [ 70/2502] eta: 0:31:02 lr: 0.000019 loss_cls: 4.3139 (4.0334) grad_norm: 2.3110 (2.3364) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 14:03:59 root] (utils.py 283): INFO Epoch: [5] [ 80/2502] eta: 0:30:53 lr: 0.000019 loss_cls: 4.3649 (4.0640) grad_norm: 2.3110 (2.3351) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 14:04:07 root] (utils.py 283): INFO Epoch: [5] [ 90/2502] eta: 0:30:44 lr: 0.000019 loss_cls: 4.1986 (4.0474) grad_norm: 2.2705 (2.3334) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 14:04:15 root] (utils.py 283): INFO Epoch: [5] [ 100/2502] eta: 0:30:38 lr: 0.000019 loss_cls: 3.7652 (4.0292) grad_norm: 2.2713 (2.3306) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 14:04:22 root] (utils.py 283): INFO Epoch: [5] [ 110/2502] eta: 0:30:34 lr: 0.000019 loss_cls: 4.2568 (4.0349) grad_norm: 2.3044 (2.3342) time: 0.7779 data: 0.0002 max mem: 8426 +[2024-12-10 14:04:30 root] (utils.py 283): INFO Epoch: [5] [ 120/2502] eta: 0:30:28 lr: 0.000019 loss_cls: 4.1526 (4.0307) grad_norm: 2.2721 (2.3305) time: 0.7809 data: 0.0002 max mem: 8426 +[2024-12-10 14:04:38 root] (utils.py 283): INFO Epoch: [5] [ 130/2502] eta: 0:30:19 lr: 0.000019 loss_cls: 4.0369 (4.0211) grad_norm: 2.2578 (2.3307) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 14:04:45 root] (utils.py 283): INFO Epoch: [5] [ 140/2502] eta: 0:30:11 lr: 0.000019 loss_cls: 4.1478 (4.0261) grad_norm: 2.2761 (2.3274) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 14:04:53 root] (utils.py 283): INFO Epoch: [5] [ 150/2502] eta: 0:30:03 lr: 0.000019 loss_cls: 4.1478 (4.0288) grad_norm: 2.2129 (2.3204) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 14:05:01 root] (utils.py 283): INFO Epoch: [5] [ 160/2502] eta: 0:29:55 lr: 0.000019 loss_cls: 4.2671 (4.0415) grad_norm: 2.2938 (2.3198) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:08 root] (utils.py 283): INFO Epoch: [5] [ 170/2502] eta: 0:29:48 lr: 0.000019 loss_cls: 4.3165 (4.0502) grad_norm: 2.3037 (2.3186) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:16 root] (utils.py 283): INFO Epoch: [5] [ 180/2502] eta: 0:29:43 lr: 0.000019 loss_cls: 4.2114 (4.0464) grad_norm: 2.3037 (2.3204) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:24 root] (utils.py 283): INFO Epoch: [5] [ 190/2502] eta: 0:29:38 lr: 0.000019 loss_cls: 4.1559 (4.0472) grad_norm: 2.3046 (2.3199) time: 0.7864 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:32 root] (utils.py 283): INFO Epoch: [5] [ 200/2502] eta: 0:29:31 lr: 0.000019 loss_cls: 4.2019 (4.0415) grad_norm: 2.2408 (2.3182) time: 0.7850 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:40 root] (utils.py 283): INFO Epoch: [5] [ 210/2502] eta: 0:29:24 lr: 0.000019 loss_cls: 4.1815 (4.0498) grad_norm: 2.2191 (2.3195) time: 0.7767 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:47 root] (utils.py 283): INFO Epoch: [5] [ 220/2502] eta: 0:29:15 lr: 0.000019 loss_cls: 4.1007 (4.0473) grad_norm: 2.2542 (2.3217) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 14:05:55 root] (utils.py 283): INFO Epoch: [5] [ 230/2502] eta: 0:29:08 lr: 0.000019 loss_cls: 4.0428 (4.0445) grad_norm: 2.2637 (2.3214) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:03 root] (utils.py 283): INFO Epoch: [5] [ 240/2502] eta: 0:28:59 lr: 0.000019 loss_cls: 3.7095 (4.0288) grad_norm: 2.3632 (2.3261) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:10 root] (utils.py 283): INFO Epoch: [5] [ 250/2502] eta: 0:28:51 lr: 0.000019 loss_cls: 3.6540 (4.0297) grad_norm: 2.3303 (2.3250) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:18 root] (utils.py 283): INFO Epoch: [5] [ 260/2502] eta: 0:28:44 lr: 0.000019 loss_cls: 4.2258 (4.0393) grad_norm: 2.3181 (2.3261) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:26 root] (utils.py 283): INFO Epoch: [5] [ 270/2502] eta: 0:28:35 lr: 0.000019 loss_cls: 3.9517 (4.0245) grad_norm: 2.3181 (2.3253) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:33 root] (utils.py 283): INFO Epoch: [5] [ 280/2502] eta: 0:28:27 lr: 0.000019 loss_cls: 3.6013 (4.0168) grad_norm: 2.2838 (2.3249) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:41 root] (utils.py 283): INFO Epoch: [5] [ 290/2502] eta: 0:28:19 lr: 0.000019 loss_cls: 3.7648 (4.0107) grad_norm: 2.3333 (2.3268) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:49 root] (utils.py 283): INFO Epoch: [5] [ 300/2502] eta: 0:28:11 lr: 0.000019 loss_cls: 3.8743 (4.0072) grad_norm: 2.3333 (2.3275) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 14:06:56 root] (utils.py 283): INFO Epoch: [5] [ 310/2502] eta: 0:28:03 lr: 0.000019 loss_cls: 4.2143 (4.0119) grad_norm: 2.2762 (2.3263) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:04 root] (utils.py 283): INFO Epoch: [5] [ 320/2502] eta: 0:27:56 lr: 0.000019 loss_cls: 4.2200 (4.0123) grad_norm: 2.2885 (2.3271) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:12 root] (utils.py 283): INFO Epoch: [5] [ 330/2502] eta: 0:27:48 lr: 0.000019 loss_cls: 3.9542 (4.0074) grad_norm: 2.2885 (2.3260) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:19 root] (utils.py 283): INFO Epoch: [5] [ 340/2502] eta: 0:27:40 lr: 0.000019 loss_cls: 4.0813 (4.0141) grad_norm: 2.2343 (2.3240) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:27 root] (utils.py 283): INFO Epoch: [5] [ 350/2502] eta: 0:27:32 lr: 0.000019 loss_cls: 4.0813 (4.0057) grad_norm: 2.2644 (2.3243) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:34 root] (utils.py 283): INFO Epoch: [5] [ 360/2502] eta: 0:27:24 lr: 0.000019 loss_cls: 4.0006 (4.0102) grad_norm: 2.3136 (2.3242) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:42 root] (utils.py 283): INFO Epoch: [5] [ 370/2502] eta: 0:27:16 lr: 0.000019 loss_cls: 4.2616 (4.0116) grad_norm: 2.3252 (2.3247) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:50 root] (utils.py 283): INFO Epoch: [5] [ 380/2502] eta: 0:27:08 lr: 0.000019 loss_cls: 4.2616 (4.0085) grad_norm: 2.3127 (2.3236) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 14:07:57 root] (utils.py 283): INFO Epoch: [5] [ 390/2502] eta: 0:27:00 lr: 0.000019 loss_cls: 4.2757 (4.0103) grad_norm: 2.3171 (2.3250) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:05 root] (utils.py 283): INFO Epoch: [5] [ 400/2502] eta: 0:26:52 lr: 0.000019 loss_cls: 4.4141 (4.0169) grad_norm: 2.3678 (2.3271) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:12 root] (utils.py 283): INFO Epoch: [5] [ 410/2502] eta: 0:26:43 lr: 0.000019 loss_cls: 4.3489 (4.0233) grad_norm: 2.3946 (2.3274) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:20 root] (utils.py 283): INFO Epoch: [5] [ 420/2502] eta: 0:26:36 lr: 0.000019 loss_cls: 4.0645 (4.0194) grad_norm: 2.3309 (2.3265) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:28 root] (utils.py 283): INFO Epoch: [5] [ 430/2502] eta: 0:26:28 lr: 0.000019 loss_cls: 3.9447 (4.0185) grad_norm: 2.2888 (2.3257) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:35 root] (utils.py 283): INFO Epoch: [5] [ 440/2502] eta: 0:26:20 lr: 0.000019 loss_cls: 4.1240 (4.0162) grad_norm: 2.2888 (2.3253) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:43 root] (utils.py 283): INFO Epoch: [5] [ 450/2502] eta: 0:26:12 lr: 0.000019 loss_cls: 4.1014 (4.0186) grad_norm: 2.2590 (2.3244) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:51 root] (utils.py 283): INFO Epoch: [5] [ 460/2502] eta: 0:26:04 lr: 0.000019 loss_cls: 4.1364 (4.0192) grad_norm: 2.2526 (2.3240) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 14:08:58 root] (utils.py 283): INFO Epoch: [5] [ 470/2502] eta: 0:25:57 lr: 0.000019 loss_cls: 4.1050 (4.0143) grad_norm: 2.2517 (2.3235) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:06 root] (utils.py 283): INFO Epoch: [5] [ 480/2502] eta: 0:25:49 lr: 0.000019 loss_cls: 3.7654 (4.0090) grad_norm: 2.3545 (2.3261) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:14 root] (utils.py 283): INFO Epoch: [5] [ 490/2502] eta: 0:25:41 lr: 0.000019 loss_cls: 3.7303 (4.0051) grad_norm: 2.3703 (2.3267) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:21 root] (utils.py 283): INFO Epoch: [5] [ 500/2502] eta: 0:25:34 lr: 0.000019 loss_cls: 4.1746 (4.0085) grad_norm: 2.3220 (2.3260) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:29 root] (utils.py 283): INFO Epoch: [5] [ 510/2502] eta: 0:25:26 lr: 0.000019 loss_cls: 4.3326 (4.0139) grad_norm: 2.2786 (2.3249) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:37 root] (utils.py 283): INFO Epoch: [5] [ 520/2502] eta: 0:25:18 lr: 0.000019 loss_cls: 4.2113 (4.0143) grad_norm: 2.2835 (2.3242) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:44 root] (utils.py 283): INFO Epoch: [5] [ 530/2502] eta: 0:25:10 lr: 0.000019 loss_cls: 4.1664 (4.0185) grad_norm: 2.3005 (2.3250) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:52 root] (utils.py 283): INFO Epoch: [5] [ 540/2502] eta: 0:25:02 lr: 0.000019 loss_cls: 4.1906 (4.0206) grad_norm: 2.3547 (2.3252) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 14:09:59 root] (utils.py 283): INFO Epoch: [5] [ 550/2502] eta: 0:24:55 lr: 0.000019 loss_cls: 4.0847 (4.0188) grad_norm: 2.2735 (2.3242) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:07 root] (utils.py 283): INFO Epoch: [5] [ 560/2502] eta: 0:24:47 lr: 0.000019 loss_cls: 3.9178 (4.0173) grad_norm: 2.2735 (2.3245) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:15 root] (utils.py 283): INFO Epoch: [5] [ 570/2502] eta: 0:24:40 lr: 0.000019 loss_cls: 3.9178 (4.0114) grad_norm: 2.3078 (2.3244) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:22 root] (utils.py 283): INFO Epoch: [5] [ 580/2502] eta: 0:24:32 lr: 0.000019 loss_cls: 4.1762 (4.0147) grad_norm: 2.3289 (2.3254) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:30 root] (utils.py 283): INFO Epoch: [5] [ 590/2502] eta: 0:24:24 lr: 0.000019 loss_cls: 4.3233 (4.0168) grad_norm: 2.3167 (2.3250) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:38 root] (utils.py 283): INFO Epoch: [5] [ 600/2502] eta: 0:24:17 lr: 0.000019 loss_cls: 4.0935 (4.0168) grad_norm: 2.3167 (2.3265) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:46 root] (utils.py 283): INFO Epoch: [5] [ 610/2502] eta: 0:24:09 lr: 0.000019 loss_cls: 4.0117 (4.0140) grad_norm: 2.4077 (2.3267) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-10 14:10:53 root] (utils.py 283): INFO Epoch: [5] [ 620/2502] eta: 0:24:02 lr: 0.000019 loss_cls: 3.9334 (4.0129) grad_norm: 2.2689 (2.3270) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 14:11:01 root] (utils.py 283): INFO Epoch: [5] [ 630/2502] eta: 0:23:54 lr: 0.000019 loss_cls: 4.0768 (4.0111) grad_norm: 2.3228 (2.3269) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 14:11:08 root] (utils.py 283): INFO Epoch: [5] [ 640/2502] eta: 0:23:46 lr: 0.000019 loss_cls: 4.2969 (4.0159) grad_norm: 2.3189 (2.3271) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 14:11:16 root] (utils.py 283): INFO Epoch: [5] [ 650/2502] eta: 0:23:38 lr: 0.000019 loss_cls: 4.1974 (4.0138) grad_norm: 2.2775 (2.3277) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 14:11:24 root] (utils.py 283): INFO Epoch: [5] [ 660/2502] eta: 0:23:31 lr: 0.000019 loss_cls: 3.8821 (4.0083) grad_norm: 2.2904 (2.3279) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 14:11:31 root] (utils.py 283): INFO Epoch: [5] [ 670/2502] eta: 0:23:23 lr: 0.000019 loss_cls: 4.0365 (4.0114) grad_norm: 2.2800 (2.3272) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 14:11:39 root] (utils.py 283): INFO Epoch: [5] [ 680/2502] eta: 0:23:15 lr: 0.000019 loss_cls: 4.0365 (4.0090) grad_norm: 2.2539 (2.3265) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 14:11:47 root] (utils.py 283): INFO Epoch: [5] [ 690/2502] eta: 0:23:08 lr: 0.000019 loss_cls: 3.9662 (4.0084) grad_norm: 2.2989 (2.3267) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 14:11:54 root] (utils.py 283): INFO Epoch: [5] [ 700/2502] eta: 0:23:00 lr: 0.000019 loss_cls: 4.0953 (4.0101) grad_norm: 2.3092 (2.3272) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:02 root] (utils.py 283): INFO Epoch: [5] [ 710/2502] eta: 0:22:53 lr: 0.000019 loss_cls: 4.1879 (4.0127) grad_norm: 2.2990 (2.3267) time: 0.7734 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:10 root] (utils.py 283): INFO Epoch: [5] [ 720/2502] eta: 0:22:45 lr: 0.000019 loss_cls: 4.1351 (4.0135) grad_norm: 2.2895 (2.3273) time: 0.7698 data: 0.0003 max mem: 8426 +[2024-12-10 14:12:17 root] (utils.py 283): INFO Epoch: [5] [ 730/2502] eta: 0:22:37 lr: 0.000019 loss_cls: 3.9297 (4.0094) grad_norm: 2.2735 (2.3260) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:25 root] (utils.py 283): INFO Epoch: [5] [ 740/2502] eta: 0:22:29 lr: 0.000019 loss_cls: 3.8014 (4.0064) grad_norm: 2.2750 (2.3257) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:33 root] (utils.py 283): INFO Epoch: [5] [ 750/2502] eta: 0:22:22 lr: 0.000019 loss_cls: 3.9325 (4.0047) grad_norm: 2.2907 (2.3261) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:40 root] (utils.py 283): INFO Epoch: [5] [ 760/2502] eta: 0:22:14 lr: 0.000019 loss_cls: 3.8902 (4.0027) grad_norm: 2.2824 (2.3253) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:48 root] (utils.py 283): INFO Epoch: [5] [ 770/2502] eta: 0:22:06 lr: 0.000019 loss_cls: 3.7766 (4.0004) grad_norm: 2.2058 (2.3241) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 14:12:56 root] (utils.py 283): INFO Epoch: [5] [ 780/2502] eta: 0:21:59 lr: 0.000019 loss_cls: 4.0453 (4.0026) grad_norm: 2.2571 (2.3243) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:03 root] (utils.py 283): INFO Epoch: [5] [ 790/2502] eta: 0:21:51 lr: 0.000019 loss_cls: 4.1540 (4.0007) grad_norm: 2.3226 (2.3244) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:11 root] (utils.py 283): INFO Epoch: [5] [ 800/2502] eta: 0:21:43 lr: 0.000019 loss_cls: 3.6312 (3.9947) grad_norm: 2.3266 (2.3249) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:19 root] (utils.py 283): INFO Epoch: [5] [ 810/2502] eta: 0:21:36 lr: 0.000019 loss_cls: 3.4954 (3.9919) grad_norm: 2.2857 (2.3242) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:26 root] (utils.py 283): INFO Epoch: [5] [ 820/2502] eta: 0:21:28 lr: 0.000019 loss_cls: 3.9914 (3.9925) grad_norm: 2.2949 (2.3247) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:34 root] (utils.py 283): INFO Epoch: [5] [ 830/2502] eta: 0:21:20 lr: 0.000019 loss_cls: 4.0594 (3.9905) grad_norm: 2.3509 (2.3254) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:42 root] (utils.py 283): INFO Epoch: [5] [ 840/2502] eta: 0:21:13 lr: 0.000019 loss_cls: 4.2103 (3.9911) grad_norm: 2.3507 (2.3261) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:49 root] (utils.py 283): INFO Epoch: [5] [ 850/2502] eta: 0:21:05 lr: 0.000019 loss_cls: 4.2103 (3.9916) grad_norm: 2.3507 (2.3265) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 14:13:57 root] (utils.py 283): INFO Epoch: [5] [ 860/2502] eta: 0:20:57 lr: 0.000019 loss_cls: 4.3072 (3.9944) grad_norm: 2.3189 (2.3264) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:05 root] (utils.py 283): INFO Epoch: [5] [ 870/2502] eta: 0:20:50 lr: 0.000019 loss_cls: 4.3072 (3.9974) grad_norm: 2.3308 (2.3261) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:12 root] (utils.py 283): INFO Epoch: [5] [ 880/2502] eta: 0:20:42 lr: 0.000019 loss_cls: 3.9754 (3.9947) grad_norm: 2.2826 (2.3255) time: 0.7768 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:20 root] (utils.py 283): INFO Epoch: [5] [ 890/2502] eta: 0:20:35 lr: 0.000019 loss_cls: 3.4594 (3.9891) grad_norm: 2.2826 (2.3251) time: 0.7841 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:28 root] (utils.py 283): INFO Epoch: [5] [ 900/2502] eta: 0:20:27 lr: 0.000019 loss_cls: 3.3478 (3.9825) grad_norm: 2.2842 (2.3245) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:36 root] (utils.py 283): INFO Epoch: [5] [ 910/2502] eta: 0:20:20 lr: 0.000019 loss_cls: 3.8214 (3.9822) grad_norm: 2.3473 (2.3249) time: 0.7762 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:43 root] (utils.py 283): INFO Epoch: [5] [ 920/2502] eta: 0:20:12 lr: 0.000019 loss_cls: 4.1045 (3.9838) grad_norm: 2.3221 (2.3241) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:51 root] (utils.py 283): INFO Epoch: [5] [ 930/2502] eta: 0:20:05 lr: 0.000019 loss_cls: 4.2958 (3.9874) grad_norm: 2.2636 (2.3246) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 14:14:59 root] (utils.py 283): INFO Epoch: [5] [ 940/2502] eta: 0:19:57 lr: 0.000019 loss_cls: 4.2416 (3.9883) grad_norm: 2.3188 (2.3239) time: 0.7723 data: 0.0003 max mem: 8426 +[2024-12-10 14:15:06 root] (utils.py 283): INFO Epoch: [5] [ 950/2502] eta: 0:19:49 lr: 0.000019 loss_cls: 4.0584 (3.9873) grad_norm: 2.3375 (2.3246) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 14:15:14 root] (utils.py 283): INFO Epoch: [5] [ 960/2502] eta: 0:19:42 lr: 0.000019 loss_cls: 4.1187 (3.9892) grad_norm: 2.3947 (2.3243) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 14:15:22 root] (utils.py 283): INFO Epoch: [5] [ 970/2502] eta: 0:19:34 lr: 0.000019 loss_cls: 4.1187 (3.9884) grad_norm: 2.2946 (2.3241) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 14:15:29 root] (utils.py 283): INFO Epoch: [5] [ 980/2502] eta: 0:19:26 lr: 0.000019 loss_cls: 3.8959 (3.9880) grad_norm: 2.2535 (2.3237) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 14:15:37 root] (utils.py 283): INFO Epoch: [5] [ 990/2502] eta: 0:19:18 lr: 0.000019 loss_cls: 3.9619 (3.9877) grad_norm: 2.2262 (2.3230) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 14:15:45 root] (utils.py 283): INFO Epoch: [5] [1000/2502] eta: 0:19:11 lr: 0.000019 loss_cls: 3.9758 (3.9892) grad_norm: 2.2541 (2.3232) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 14:15:52 root] (utils.py 283): INFO Epoch: [5] [1010/2502] eta: 0:19:03 lr: 0.000019 loss_cls: 3.7859 (3.9847) grad_norm: 2.3008 (2.3228) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 14:16:00 root] (utils.py 283): INFO Epoch: [5] [1020/2502] eta: 0:18:56 lr: 0.000019 loss_cls: 3.7600 (3.9826) grad_norm: 2.3264 (2.3234) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 14:16:08 root] (utils.py 283): INFO Epoch: [5] [1030/2502] eta: 0:18:48 lr: 0.000019 loss_cls: 3.7820 (3.9822) grad_norm: 2.3624 (2.3238) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 14:16:15 root] (utils.py 283): INFO Epoch: [5] [1040/2502] eta: 0:18:40 lr: 0.000019 loss_cls: 4.0191 (3.9807) grad_norm: 2.3211 (2.3240) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 14:16:23 root] (utils.py 283): INFO Epoch: [5] [1050/2502] eta: 0:18:33 lr: 0.000019 loss_cls: 4.0191 (3.9804) grad_norm: 2.3170 (2.3236) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 14:16:31 root] (utils.py 283): INFO Epoch: [5] [1060/2502] eta: 0:18:25 lr: 0.000019 loss_cls: 3.8683 (3.9779) grad_norm: 2.2742 (2.3238) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 14:16:38 root] (utils.py 283): INFO Epoch: [5] [1070/2502] eta: 0:18:17 lr: 0.000019 loss_cls: 3.8413 (3.9773) grad_norm: 2.3613 (2.3240) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:16:46 root] (utils.py 283): INFO Epoch: [5] [1080/2502] eta: 0:18:09 lr: 0.000019 loss_cls: 4.1100 (3.9776) grad_norm: 2.3489 (2.3242) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 14:16:54 root] (utils.py 283): INFO Epoch: [5] [1090/2502] eta: 0:18:02 lr: 0.000019 loss_cls: 3.7162 (3.9745) grad_norm: 2.3459 (2.3243) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:01 root] (utils.py 283): INFO Epoch: [5] [1100/2502] eta: 0:17:54 lr: 0.000019 loss_cls: 3.7637 (3.9745) grad_norm: 2.3205 (2.3246) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:09 root] (utils.py 283): INFO Epoch: [5] [1110/2502] eta: 0:17:46 lr: 0.000019 loss_cls: 4.3184 (3.9745) grad_norm: 2.3043 (2.3252) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:16 root] (utils.py 283): INFO Epoch: [5] [1120/2502] eta: 0:17:39 lr: 0.000019 loss_cls: 4.3070 (3.9776) grad_norm: 2.3043 (2.3251) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:24 root] (utils.py 283): INFO Epoch: [5] [1130/2502] eta: 0:17:31 lr: 0.000019 loss_cls: 4.2238 (3.9738) grad_norm: 2.2531 (2.3243) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:32 root] (utils.py 283): INFO Epoch: [5] [1140/2502] eta: 0:17:23 lr: 0.000019 loss_cls: 3.5384 (3.9703) grad_norm: 2.2656 (2.3245) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:39 root] (utils.py 283): INFO Epoch: [5] [1150/2502] eta: 0:17:15 lr: 0.000019 loss_cls: 3.5431 (3.9684) grad_norm: 2.3566 (2.3245) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:47 root] (utils.py 283): INFO Epoch: [5] [1160/2502] eta: 0:17:08 lr: 0.000019 loss_cls: 4.1304 (3.9694) grad_norm: 2.2854 (2.3243) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 14:17:55 root] (utils.py 283): INFO Epoch: [5] [1170/2502] eta: 0:17:00 lr: 0.000019 loss_cls: 4.2092 (3.9707) grad_norm: 2.2652 (2.3247) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:02 root] (utils.py 283): INFO Epoch: [5] [1180/2502] eta: 0:16:53 lr: 0.000019 loss_cls: 4.1520 (3.9692) grad_norm: 2.3283 (2.3246) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:10 root] (utils.py 283): INFO Epoch: [5] [1190/2502] eta: 0:16:45 lr: 0.000019 loss_cls: 3.8824 (3.9694) grad_norm: 2.3367 (2.3250) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:18 root] (utils.py 283): INFO Epoch: [5] [1200/2502] eta: 0:16:37 lr: 0.000019 loss_cls: 4.2849 (3.9704) grad_norm: 2.3306 (2.3251) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:25 root] (utils.py 283): INFO Epoch: [5] [1210/2502] eta: 0:16:30 lr: 0.000019 loss_cls: 3.9214 (3.9698) grad_norm: 2.3015 (2.3253) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:33 root] (utils.py 283): INFO Epoch: [5] [1220/2502] eta: 0:16:22 lr: 0.000019 loss_cls: 3.9041 (3.9688) grad_norm: 2.3015 (2.3253) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:41 root] (utils.py 283): INFO Epoch: [5] [1230/2502] eta: 0:16:14 lr: 0.000019 loss_cls: 3.8728 (3.9681) grad_norm: 2.3472 (2.3256) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:48 root] (utils.py 283): INFO Epoch: [5] [1240/2502] eta: 0:16:06 lr: 0.000019 loss_cls: 4.0904 (3.9702) grad_norm: 2.3423 (2.3254) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 14:18:56 root] (utils.py 283): INFO Epoch: [5] [1250/2502] eta: 0:15:59 lr: 0.000019 loss_cls: 4.1362 (3.9706) grad_norm: 2.3199 (2.3251) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:04 root] (utils.py 283): INFO Epoch: [5] [1260/2502] eta: 0:15:51 lr: 0.000019 loss_cls: 4.0693 (3.9710) grad_norm: 2.3324 (2.3255) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:11 root] (utils.py 283): INFO Epoch: [5] [1270/2502] eta: 0:15:43 lr: 0.000019 loss_cls: 4.2282 (3.9735) grad_norm: 2.3876 (2.3262) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:19 root] (utils.py 283): INFO Epoch: [5] [1280/2502] eta: 0:15:36 lr: 0.000019 loss_cls: 4.2282 (3.9727) grad_norm: 2.3957 (2.3265) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:27 root] (utils.py 283): INFO Epoch: [5] [1290/2502] eta: 0:15:28 lr: 0.000019 loss_cls: 3.9260 (3.9729) grad_norm: 2.3847 (2.3270) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:34 root] (utils.py 283): INFO Epoch: [5] [1300/2502] eta: 0:15:20 lr: 0.000019 loss_cls: 3.8326 (3.9705) grad_norm: 2.3046 (2.3268) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:42 root] (utils.py 283): INFO Epoch: [5] [1310/2502] eta: 0:15:13 lr: 0.000019 loss_cls: 3.7839 (3.9688) grad_norm: 2.3032 (2.3268) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:49 root] (utils.py 283): INFO Epoch: [5] [1320/2502] eta: 0:15:05 lr: 0.000019 loss_cls: 3.8743 (3.9681) grad_norm: 2.2944 (2.3264) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 14:19:57 root] (utils.py 283): INFO Epoch: [5] [1330/2502] eta: 0:14:57 lr: 0.000019 loss_cls: 3.9628 (3.9695) grad_norm: 2.2944 (2.3269) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 14:20:05 root] (utils.py 283): INFO Epoch: [5] [1340/2502] eta: 0:14:50 lr: 0.000019 loss_cls: 4.2538 (3.9691) grad_norm: 2.2966 (2.3267) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 14:20:12 root] (utils.py 283): INFO Epoch: [5] [1350/2502] eta: 0:14:42 lr: 0.000019 loss_cls: 3.9320 (3.9696) grad_norm: 2.2940 (2.3266) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 14:20:20 root] (utils.py 283): INFO Epoch: [5] [1360/2502] eta: 0:14:34 lr: 0.000019 loss_cls: 3.9449 (3.9700) grad_norm: 2.3093 (2.3266) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:20:28 root] (utils.py 283): INFO Epoch: [5] [1370/2502] eta: 0:14:27 lr: 0.000019 loss_cls: 3.9181 (3.9683) grad_norm: 2.3186 (2.3269) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 14:20:35 root] (utils.py 283): INFO Epoch: [5] [1380/2502] eta: 0:14:19 lr: 0.000019 loss_cls: 3.9916 (3.9696) grad_norm: 2.3376 (2.3275) time: 0.7703 data: 0.0003 max mem: 8426 +[2024-12-10 14:20:43 root] (utils.py 283): INFO Epoch: [5] [1390/2502] eta: 0:14:11 lr: 0.000019 loss_cls: 4.1865 (3.9700) grad_norm: 2.3484 (2.3276) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 14:20:51 root] (utils.py 283): INFO Epoch: [5] [1400/2502] eta: 0:14:04 lr: 0.000019 loss_cls: 4.0549 (3.9704) grad_norm: 2.3552 (2.3282) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 14:20:58 root] (utils.py 283): INFO Epoch: [5] [1410/2502] eta: 0:13:56 lr: 0.000019 loss_cls: 3.9860 (3.9687) grad_norm: 2.2598 (2.3276) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:06 root] (utils.py 283): INFO Epoch: [5] [1420/2502] eta: 0:13:48 lr: 0.000019 loss_cls: 3.9077 (3.9682) grad_norm: 2.2409 (2.3271) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:14 root] (utils.py 283): INFO Epoch: [5] [1430/2502] eta: 0:13:41 lr: 0.000019 loss_cls: 3.9658 (3.9680) grad_norm: 2.2647 (2.3267) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:21 root] (utils.py 283): INFO Epoch: [5] [1440/2502] eta: 0:13:33 lr: 0.000019 loss_cls: 3.9658 (3.9666) grad_norm: 2.2639 (2.3262) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:29 root] (utils.py 283): INFO Epoch: [5] [1450/2502] eta: 0:13:25 lr: 0.000019 loss_cls: 4.0067 (3.9662) grad_norm: 2.2521 (2.3260) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:37 root] (utils.py 283): INFO Epoch: [5] [1460/2502] eta: 0:13:18 lr: 0.000019 loss_cls: 4.0971 (3.9660) grad_norm: 2.3150 (2.3262) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:44 root] (utils.py 283): INFO Epoch: [5] [1470/2502] eta: 0:13:10 lr: 0.000019 loss_cls: 4.0930 (3.9649) grad_norm: 2.2867 (2.3258) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 14:21:52 root] (utils.py 283): INFO Epoch: [5] [1480/2502] eta: 0:13:02 lr: 0.000019 loss_cls: 4.1116 (3.9666) grad_norm: 2.2596 (2.3258) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:00 root] (utils.py 283): INFO Epoch: [5] [1490/2502] eta: 0:12:55 lr: 0.000019 loss_cls: 4.2824 (3.9682) grad_norm: 2.3407 (2.3259) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:07 root] (utils.py 283): INFO Epoch: [5] [1500/2502] eta: 0:12:47 lr: 0.000019 loss_cls: 4.3532 (3.9701) grad_norm: 2.3486 (2.3264) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:15 root] (utils.py 283): INFO Epoch: [5] [1510/2502] eta: 0:12:40 lr: 0.000019 loss_cls: 4.1416 (3.9692) grad_norm: 2.3367 (2.3265) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:23 root] (utils.py 283): INFO Epoch: [5] [1520/2502] eta: 0:12:32 lr: 0.000019 loss_cls: 4.0089 (3.9704) grad_norm: 2.2957 (2.3267) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-10 14:22:30 root] (utils.py 283): INFO Epoch: [5] [1530/2502] eta: 0:12:24 lr: 0.000019 loss_cls: 4.1272 (3.9692) grad_norm: 2.2957 (2.3267) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:38 root] (utils.py 283): INFO Epoch: [5] [1540/2502] eta: 0:12:17 lr: 0.000019 loss_cls: 3.9639 (3.9704) grad_norm: 2.3221 (2.3267) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:46 root] (utils.py 283): INFO Epoch: [5] [1550/2502] eta: 0:12:09 lr: 0.000019 loss_cls: 3.8761 (3.9690) grad_norm: 2.3137 (2.3266) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 14:22:53 root] (utils.py 283): INFO Epoch: [5] [1560/2502] eta: 0:12:01 lr: 0.000019 loss_cls: 4.0383 (3.9710) grad_norm: 2.2888 (2.3264) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:01 root] (utils.py 283): INFO Epoch: [5] [1570/2502] eta: 0:11:54 lr: 0.000019 loss_cls: 4.2270 (3.9724) grad_norm: 2.3080 (2.3266) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:09 root] (utils.py 283): INFO Epoch: [5] [1580/2502] eta: 0:11:46 lr: 0.000019 loss_cls: 4.1345 (3.9743) grad_norm: 2.3019 (2.3265) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:16 root] (utils.py 283): INFO Epoch: [5] [1590/2502] eta: 0:11:38 lr: 0.000019 loss_cls: 4.1086 (3.9743) grad_norm: 2.3488 (2.3268) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:24 root] (utils.py 283): INFO Epoch: [5] [1600/2502] eta: 0:11:31 lr: 0.000019 loss_cls: 3.7511 (3.9733) grad_norm: 2.3525 (2.3271) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:32 root] (utils.py 283): INFO Epoch: [5] [1610/2502] eta: 0:11:23 lr: 0.000019 loss_cls: 3.6897 (3.9722) grad_norm: 2.3275 (2.3272) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:39 root] (utils.py 283): INFO Epoch: [5] [1620/2502] eta: 0:11:15 lr: 0.000019 loss_cls: 4.0203 (3.9731) grad_norm: 2.2743 (2.3273) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:47 root] (utils.py 283): INFO Epoch: [5] [1630/2502] eta: 0:11:08 lr: 0.000019 loss_cls: 4.2040 (3.9739) grad_norm: 2.2712 (2.3273) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 14:23:55 root] (utils.py 283): INFO Epoch: [5] [1640/2502] eta: 0:11:00 lr: 0.000019 loss_cls: 4.2945 (3.9755) grad_norm: 2.2712 (2.3269) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 14:24:02 root] (utils.py 283): INFO Epoch: [5] [1650/2502] eta: 0:10:52 lr: 0.000019 loss_cls: 4.2416 (3.9758) grad_norm: 2.2859 (2.3268) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 14:24:10 root] (utils.py 283): INFO Epoch: [5] [1660/2502] eta: 0:10:45 lr: 0.000019 loss_cls: 4.0375 (3.9744) grad_norm: 2.2955 (2.3270) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 14:24:18 root] (utils.py 283): INFO Epoch: [5] [1670/2502] eta: 0:10:37 lr: 0.000019 loss_cls: 3.7143 (3.9736) grad_norm: 2.2952 (2.3271) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 14:24:25 root] (utils.py 283): INFO Epoch: [5] [1680/2502] eta: 0:10:29 lr: 0.000019 loss_cls: 4.1293 (3.9755) grad_norm: 2.2728 (2.3268) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 14:24:33 root] (utils.py 283): INFO Epoch: [5] [1690/2502] eta: 0:10:22 lr: 0.000019 loss_cls: 4.2900 (3.9773) grad_norm: 2.2827 (2.3267) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 14:24:41 root] (utils.py 283): INFO Epoch: [5] [1700/2502] eta: 0:10:14 lr: 0.000019 loss_cls: 3.9153 (3.9742) grad_norm: 2.3223 (2.3274) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 14:24:48 root] (utils.py 283): INFO Epoch: [5] [1710/2502] eta: 0:10:06 lr: 0.000019 loss_cls: 3.5042 (3.9735) grad_norm: 2.3165 (2.3273) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 14:24:56 root] (utils.py 283): INFO Epoch: [5] [1720/2502] eta: 0:09:59 lr: 0.000019 loss_cls: 3.8680 (3.9723) grad_norm: 2.2622 (2.3271) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:03 root] (utils.py 283): INFO Epoch: [5] [1730/2502] eta: 0:09:51 lr: 0.000019 loss_cls: 4.0617 (3.9728) grad_norm: 2.3233 (2.3270) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:11 root] (utils.py 283): INFO Epoch: [5] [1740/2502] eta: 0:09:43 lr: 0.000019 loss_cls: 4.1424 (3.9735) grad_norm: 2.3408 (2.3271) time: 0.7723 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:19 root] (utils.py 283): INFO Epoch: [5] [1750/2502] eta: 0:09:36 lr: 0.000019 loss_cls: 3.8491 (3.9726) grad_norm: 2.3099 (2.3271) time: 0.7769 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:27 root] (utils.py 283): INFO Epoch: [5] [1760/2502] eta: 0:09:28 lr: 0.000019 loss_cls: 3.8444 (3.9722) grad_norm: 2.3496 (2.3275) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:34 root] (utils.py 283): INFO Epoch: [5] [1770/2502] eta: 0:09:20 lr: 0.000019 loss_cls: 3.7546 (3.9712) grad_norm: 2.3449 (2.3275) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 14:25:42 root] (utils.py 283): INFO Epoch: [5] [1780/2502] eta: 0:09:13 lr: 0.000019 loss_cls: 3.8789 (3.9724) grad_norm: 2.3449 (2.3280) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:50 root] (utils.py 283): INFO Epoch: [5] [1790/2502] eta: 0:09:05 lr: 0.000019 loss_cls: 4.2018 (3.9727) grad_norm: 2.3647 (2.3278) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 14:25:57 root] (utils.py 283): INFO Epoch: [5] [1800/2502] eta: 0:08:57 lr: 0.000019 loss_cls: 3.7908 (3.9712) grad_norm: 2.2493 (2.3273) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:05 root] (utils.py 283): INFO Epoch: [5] [1810/2502] eta: 0:08:50 lr: 0.000019 loss_cls: 3.9394 (3.9723) grad_norm: 2.2532 (2.3271) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:13 root] (utils.py 283): INFO Epoch: [5] [1820/2502] eta: 0:08:42 lr: 0.000019 loss_cls: 4.2252 (3.9725) grad_norm: 2.2743 (2.3269) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:21 root] (utils.py 283): INFO Epoch: [5] [1830/2502] eta: 0:08:34 lr: 0.000019 loss_cls: 4.1250 (3.9724) grad_norm: 2.2543 (2.3265) time: 0.7770 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:28 root] (utils.py 283): INFO Epoch: [5] [1840/2502] eta: 0:08:27 lr: 0.000019 loss_cls: 4.0534 (3.9728) grad_norm: 2.2890 (2.3265) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:36 root] (utils.py 283): INFO Epoch: [5] [1850/2502] eta: 0:08:19 lr: 0.000019 loss_cls: 3.9819 (3.9718) grad_norm: 2.2966 (2.3263) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:43 root] (utils.py 283): INFO Epoch: [5] [1860/2502] eta: 0:08:11 lr: 0.000019 loss_cls: 4.2169 (3.9735) grad_norm: 2.2739 (2.3263) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 14:26:51 root] (utils.py 283): INFO Epoch: [5] [1870/2502] eta: 0:08:04 lr: 0.000019 loss_cls: 4.3541 (3.9742) grad_norm: 2.2739 (2.3259) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 14:26:59 root] (utils.py 283): INFO Epoch: [5] [1880/2502] eta: 0:07:56 lr: 0.000019 loss_cls: 4.0033 (3.9736) grad_norm: 2.2866 (2.3261) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 14:27:06 root] (utils.py 283): INFO Epoch: [5] [1890/2502] eta: 0:07:48 lr: 0.000019 loss_cls: 4.0545 (3.9749) grad_norm: 2.3329 (2.3262) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 14:27:14 root] (utils.py 283): INFO Epoch: [5] [1900/2502] eta: 0:07:41 lr: 0.000019 loss_cls: 4.1700 (3.9757) grad_norm: 2.3017 (2.3259) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 14:27:22 root] (utils.py 283): INFO Epoch: [5] [1910/2502] eta: 0:07:33 lr: 0.000019 loss_cls: 4.1700 (3.9750) grad_norm: 2.3017 (2.3260) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 14:27:29 root] (utils.py 283): INFO Epoch: [5] [1920/2502] eta: 0:07:25 lr: 0.000019 loss_cls: 4.2476 (3.9770) grad_norm: 2.2776 (2.3259) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 14:27:37 root] (utils.py 283): INFO Epoch: [5] [1930/2502] eta: 0:07:18 lr: 0.000019 loss_cls: 4.2242 (3.9766) grad_norm: 2.2842 (2.3260) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 14:27:45 root] (utils.py 283): INFO Epoch: [5] [1940/2502] eta: 0:07:10 lr: 0.000019 loss_cls: 4.0429 (3.9766) grad_norm: 2.2859 (2.3257) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 14:27:52 root] (utils.py 283): INFO Epoch: [5] [1950/2502] eta: 0:07:02 lr: 0.000019 loss_cls: 4.1994 (3.9773) grad_norm: 2.2778 (2.3258) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:00 root] (utils.py 283): INFO Epoch: [5] [1960/2502] eta: 0:06:55 lr: 0.000019 loss_cls: 4.2953 (3.9793) grad_norm: 2.3364 (2.3260) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:08 root] (utils.py 283): INFO Epoch: [5] [1970/2502] eta: 0:06:47 lr: 0.000019 loss_cls: 4.2953 (3.9796) grad_norm: 2.4029 (2.3265) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:15 root] (utils.py 283): INFO Epoch: [5] [1980/2502] eta: 0:06:39 lr: 0.000019 loss_cls: 4.2882 (3.9811) grad_norm: 2.3409 (2.3263) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:23 root] (utils.py 283): INFO Epoch: [5] [1990/2502] eta: 0:06:32 lr: 0.000019 loss_cls: 4.1532 (3.9809) grad_norm: 2.3377 (2.3266) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:30 root] (utils.py 283): INFO Epoch: [5] [2000/2502] eta: 0:06:24 lr: 0.000019 loss_cls: 3.8612 (3.9805) grad_norm: 2.3544 (2.3269) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:38 root] (utils.py 283): INFO Epoch: [5] [2010/2502] eta: 0:06:16 lr: 0.000019 loss_cls: 4.1156 (3.9821) grad_norm: 2.3280 (2.3270) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:46 root] (utils.py 283): INFO Epoch: [5] [2020/2502] eta: 0:06:09 lr: 0.000019 loss_cls: 4.2790 (3.9820) grad_norm: 2.3191 (2.3274) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 14:28:53 root] (utils.py 283): INFO Epoch: [5] [2030/2502] eta: 0:06:01 lr: 0.000019 loss_cls: 4.1136 (3.9821) grad_norm: 2.2721 (2.3268) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 14:29:01 root] (utils.py 283): INFO Epoch: [5] [2040/2502] eta: 0:05:53 lr: 0.000019 loss_cls: 3.8796 (3.9811) grad_norm: 2.2490 (2.3266) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 14:29:09 root] (utils.py 283): INFO Epoch: [5] [2050/2502] eta: 0:05:46 lr: 0.000019 loss_cls: 3.8934 (3.9819) grad_norm: 2.2490 (2.3265) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 14:29:16 root] (utils.py 283): INFO Epoch: [5] [2060/2502] eta: 0:05:38 lr: 0.000019 loss_cls: 4.1890 (3.9813) grad_norm: 2.3020 (2.3263) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:29:24 root] (utils.py 283): INFO Epoch: [5] [2070/2502] eta: 0:05:30 lr: 0.000019 loss_cls: 4.1890 (3.9812) grad_norm: 2.2808 (2.3265) time: 0.7747 data: 0.0002 max mem: 8426 +[2024-12-10 14:29:32 root] (utils.py 283): INFO Epoch: [5] [2080/2502] eta: 0:05:23 lr: 0.000019 loss_cls: 4.2443 (3.9816) grad_norm: 2.3067 (2.3265) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-10 14:29:40 root] (utils.py 283): INFO Epoch: [5] [2090/2502] eta: 0:05:15 lr: 0.000019 loss_cls: 4.3523 (3.9822) grad_norm: 2.3067 (2.3265) time: 0.7697 data: 0.0003 max mem: 8426 +[2024-12-10 14:29:47 root] (utils.py 283): INFO Epoch: [5] [2100/2502] eta: 0:05:07 lr: 0.000019 loss_cls: 4.3260 (3.9842) grad_norm: 2.2852 (2.3264) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 14:29:55 root] (utils.py 283): INFO Epoch: [5] [2110/2502] eta: 0:05:00 lr: 0.000019 loss_cls: 4.3204 (3.9853) grad_norm: 2.2892 (2.3262) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 14:30:03 root] (utils.py 283): INFO Epoch: [5] [2120/2502] eta: 0:04:52 lr: 0.000019 loss_cls: 4.0807 (3.9851) grad_norm: 2.2971 (2.3264) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 14:30:10 root] (utils.py 283): INFO Epoch: [5] [2130/2502] eta: 0:04:45 lr: 0.000019 loss_cls: 3.9206 (3.9834) grad_norm: 2.3665 (2.3267) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 14:30:18 root] (utils.py 283): INFO Epoch: [5] [2140/2502] eta: 0:04:37 lr: 0.000019 loss_cls: 3.7972 (3.9835) grad_norm: 2.3326 (2.3268) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 14:30:26 root] (utils.py 283): INFO Epoch: [5] [2150/2502] eta: 0:04:29 lr: 0.000019 loss_cls: 4.1802 (3.9846) grad_norm: 2.3291 (2.3269) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 14:30:33 root] (utils.py 283): INFO Epoch: [5] [2160/2502] eta: 0:04:22 lr: 0.000019 loss_cls: 4.0747 (3.9833) grad_norm: 2.3291 (2.3267) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 14:30:41 root] (utils.py 283): INFO Epoch: [5] [2170/2502] eta: 0:04:14 lr: 0.000019 loss_cls: 3.9282 (3.9821) grad_norm: 2.3331 (2.3267) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 14:30:49 root] (utils.py 283): INFO Epoch: [5] [2180/2502] eta: 0:04:06 lr: 0.000019 loss_cls: 3.9600 (3.9815) grad_norm: 2.2919 (2.3266) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 14:30:56 root] (utils.py 283): INFO Epoch: [5] [2190/2502] eta: 0:03:59 lr: 0.000019 loss_cls: 3.5368 (3.9782) grad_norm: 2.2672 (2.3266) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 14:31:04 root] (utils.py 283): INFO Epoch: [5] [2200/2502] eta: 0:03:51 lr: 0.000019 loss_cls: 3.5556 (3.9781) grad_norm: 2.3748 (2.3271) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:12 root] (utils.py 283): INFO Epoch: [5] [2210/2502] eta: 0:03:43 lr: 0.000019 loss_cls: 3.9626 (3.9776) grad_norm: 2.3621 (2.3269) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:19 root] (utils.py 283): INFO Epoch: [5] [2220/2502] eta: 0:03:36 lr: 0.000019 loss_cls: 3.9756 (3.9777) grad_norm: 2.2636 (2.3268) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:27 root] (utils.py 283): INFO Epoch: [5] [2230/2502] eta: 0:03:28 lr: 0.000019 loss_cls: 4.2093 (3.9781) grad_norm: 2.2521 (2.3267) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:35 root] (utils.py 283): INFO Epoch: [5] [2240/2502] eta: 0:03:20 lr: 0.000019 loss_cls: 4.1062 (3.9777) grad_norm: 2.2833 (2.3265) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:42 root] (utils.py 283): INFO Epoch: [5] [2250/2502] eta: 0:03:13 lr: 0.000019 loss_cls: 3.8187 (3.9765) grad_norm: 2.2812 (2.3265) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:50 root] (utils.py 283): INFO Epoch: [5] [2260/2502] eta: 0:03:05 lr: 0.000019 loss_cls: 3.4778 (3.9755) grad_norm: 2.2812 (2.3264) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 14:31:58 root] (utils.py 283): INFO Epoch: [5] [2270/2502] eta: 0:02:57 lr: 0.000019 loss_cls: 3.4574 (3.9737) grad_norm: 2.3050 (2.3263) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:05 root] (utils.py 283): INFO Epoch: [5] [2280/2502] eta: 0:02:50 lr: 0.000019 loss_cls: 3.9759 (3.9754) grad_norm: 2.3075 (2.3262) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:13 root] (utils.py 283): INFO Epoch: [5] [2290/2502] eta: 0:02:42 lr: 0.000019 loss_cls: 4.3415 (3.9760) grad_norm: 2.2991 (2.3261) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:21 root] (utils.py 283): INFO Epoch: [5] [2300/2502] eta: 0:02:34 lr: 0.000019 loss_cls: 4.0121 (3.9760) grad_norm: 2.2991 (2.3259) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:28 root] (utils.py 283): INFO Epoch: [5] [2310/2502] eta: 0:02:27 lr: 0.000019 loss_cls: 3.7954 (3.9748) grad_norm: 2.2556 (2.3257) time: 0.7590 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:36 root] (utils.py 283): INFO Epoch: [5] [2320/2502] eta: 0:02:19 lr: 0.000019 loss_cls: 3.7086 (3.9743) grad_norm: 2.2557 (2.3258) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:43 root] (utils.py 283): INFO Epoch: [5] [2330/2502] eta: 0:02:11 lr: 0.000019 loss_cls: 4.1297 (3.9747) grad_norm: 2.3292 (2.3258) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:51 root] (utils.py 283): INFO Epoch: [5] [2340/2502] eta: 0:02:04 lr: 0.000019 loss_cls: 4.2681 (3.9754) grad_norm: 2.3697 (2.3260) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 14:32:59 root] (utils.py 283): INFO Epoch: [5] [2350/2502] eta: 0:01:56 lr: 0.000019 loss_cls: 4.0964 (3.9748) grad_norm: 2.3463 (2.3259) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 14:33:06 root] (utils.py 283): INFO Epoch: [5] [2360/2502] eta: 0:01:48 lr: 0.000019 loss_cls: 4.1280 (3.9750) grad_norm: 2.2835 (2.3260) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 14:33:14 root] (utils.py 283): INFO Epoch: [5] [2370/2502] eta: 0:01:41 lr: 0.000019 loss_cls: 4.0331 (3.9744) grad_norm: 2.3416 (2.3262) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 14:33:22 root] (utils.py 283): INFO Epoch: [5] [2380/2502] eta: 0:01:33 lr: 0.000019 loss_cls: 4.1263 (3.9744) grad_norm: 2.3416 (2.3262) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 14:33:29 root] (utils.py 283): INFO Epoch: [5] [2390/2502] eta: 0:01:25 lr: 0.000019 loss_cls: 4.2470 (3.9744) grad_norm: 2.2885 (2.3261) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 14:33:37 root] (utils.py 283): INFO Epoch: [5] [2400/2502] eta: 0:01:18 lr: 0.000019 loss_cls: 4.3214 (3.9748) grad_norm: 2.2938 (2.3260) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 14:33:45 root] (utils.py 283): INFO Epoch: [5] [2410/2502] eta: 0:01:10 lr: 0.000019 loss_cls: 4.3214 (3.9756) grad_norm: 2.3057 (2.3259) time: 0.7710 data: 0.0003 max mem: 8426 +[2024-12-10 14:33:53 root] (utils.py 283): INFO Epoch: [5] [2420/2502] eta: 0:01:02 lr: 0.000019 loss_cls: 4.2487 (3.9749) grad_norm: 2.2708 (2.3259) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 14:34:00 root] (utils.py 283): INFO Epoch: [5] [2430/2502] eta: 0:00:55 lr: 0.000019 loss_cls: 4.1913 (3.9755) grad_norm: 2.2352 (2.3260) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 14:34:08 root] (utils.py 283): INFO Epoch: [5] [2440/2502] eta: 0:00:47 lr: 0.000019 loss_cls: 4.1612 (3.9754) grad_norm: 2.2783 (2.3258) time: 0.7750 data: 0.0002 max mem: 8426 +[2024-12-10 14:34:16 root] (utils.py 283): INFO Epoch: [5] [2450/2502] eta: 0:00:39 lr: 0.000019 loss_cls: 4.0197 (3.9757) grad_norm: 2.2178 (2.3254) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 14:34:24 root] (utils.py 283): INFO Epoch: [5] [2460/2502] eta: 0:00:32 lr: 0.000019 loss_cls: 3.8934 (3.9751) grad_norm: 2.2258 (2.3253) time: 0.7843 data: 0.0003 max mem: 8426 +[2024-12-10 14:34:31 root] (utils.py 283): INFO Epoch: [5] [2470/2502] eta: 0:00:24 lr: 0.000019 loss_cls: 3.8121 (3.9749) grad_norm: 2.3063 (2.3254) time: 0.7725 data: 0.0003 max mem: 8426 +[2024-12-10 14:34:39 root] (utils.py 283): INFO Epoch: [5] [2480/2502] eta: 0:00:16 lr: 0.000019 loss_cls: 4.1288 (3.9751) grad_norm: 2.3356 (2.3254) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 14:34:47 root] (utils.py 283): INFO Epoch: [5] [2490/2502] eta: 0:00:09 lr: 0.000019 loss_cls: 4.2511 (3.9759) grad_norm: 2.3356 (2.3254) time: 0.7821 data: 0.0226 max mem: 8426 +[2024-12-10 14:34:55 root] (utils.py 283): INFO Epoch: [5] [2500/2502] eta: 0:00:01 lr: 0.000019 loss_cls: 4.2562 (3.9765) grad_norm: 2.3418 (2.3256) time: 0.7865 data: 0.0226 max mem: 8426 +[2024-12-10 14:34:55 root] (utils.py 283): INFO Epoch: [5] [2501/2502] eta: 0:00:00 lr: 0.000019 loss_cls: 4.2562 (3.9766) grad_norm: 2.3288 (2.3256) time: 0.7875 data: 0.0226 max mem: 8426 +[2024-12-10 14:34:55 root] (utils.py 297): INFO Epoch: [5] Total time: 0:31:58 (0.7666 s / it) +[2024-12-10 14:34:55 root] (engine.py 179): INFO Averaged stats:lr: 0.000019 loss_cls: 4.2562 (3.9687) grad_norm: 2.3288 (2.3256) +[2024-12-10 14:34:56 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6427 (0.6427) acc1: 88.2812 (88.2812) acc3: 95.3125 (95.3125) acc5: 99.2188 (99.2188) time: 0.1277 data: 0.0005 max mem: 8426 +[2024-12-10 14:34:57 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7517 (0.8253) acc1: 85.1562 (82.0312) acc3: 95.3125 (93.3239) acc5: 96.8750 (96.5909) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 14:34:58 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8761 (0.8727) acc1: 79.6875 (81.0640) acc3: 92.1875 (92.7455) acc5: 95.3125 (95.6101) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 14:35:00 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9591 (0.8898) acc1: 79.6875 (80.0907) acc3: 92.1875 (92.8427) acc5: 95.3125 (95.5897) time: 0.1282 data: 0.0005 max mem: 8426 +[2024-12-10 14:35:01 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8402 (0.8767) acc1: 79.6875 (80.4306) acc3: 93.7500 (92.9306) acc5: 96.0938 (95.6936) time: 0.1283 data: 0.0005 max mem: 8426 +[2024-12-10 14:35:03 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0626 (0.9606) acc1: 75.0000 (78.4467) acc3: 88.2812 (91.6973) acc5: 92.9688 (94.6844) time: 0.1498 data: 0.0221 max mem: 8426 +[2024-12-10 14:35:05 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2509 (1.0047) acc1: 72.6562 (77.8432) acc3: 86.7188 (90.8555) acc5: 90.6250 (94.0061) time: 0.1926 data: 0.0634 max mem: 8426 +[2024-12-10 14:35:06 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2331 (1.0466) acc1: 74.2188 (76.9366) acc3: 86.7188 (90.1959) acc5: 90.6250 (93.5409) time: 0.1709 data: 0.0417 max mem: 8426 +[2024-12-10 14:35:08 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2331 (1.0808) acc1: 73.4375 (76.1188) acc3: 85.9375 (89.6219) acc5: 90.6250 (93.0170) time: 0.1309 data: 0.0020 max mem: 8426 +[2024-12-10 14:35:09 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2783 (1.1113) acc1: 71.0938 (75.2919) acc3: 85.9375 (89.2600) acc5: 90.6250 (92.7455) time: 0.1333 data: 0.0036 max mem: 8426 +[2024-12-10 14:35:10 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1797 (1.0988) acc1: 74.2188 (75.6080) acc3: 88.2812 (89.4640) acc5: 91.4062 (92.9040) time: 0.1334 data: 0.0044 max mem: 8426 +[2024-12-10 14:35:10 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1430 s / it) +[2024-12-10 14:35:10 root] (engine.py 264): INFO * Acc@1 75.568 Acc@3 89.458 Acc@5 92.734 loss 1.100 flops 1.285 layer_flops 1.251 +[2024-12-10 14:35:10 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 14:35:11 root] (main.py 576): INFO Max accuracy: 75.57% +[2024-12-10 14:35:11 root] (utils.py 283): INFO Epoch: [6] [ 0/2502] eta: 0:31:22 lr: 0.000019 loss_cls: 4.9309 (4.9309) grad_norm: 2.3062 (2.3062) time: 0.7523 data: 0.0004 max mem: 8426 +[2024-12-10 14:35:19 root] (utils.py 283): INFO Epoch: [6] [ 10/2502] eta: 0:31:34 lr: 0.000019 loss_cls: 3.9307 (3.8674) grad_norm: 2.3062 (2.3317) time: 0.7602 data: 0.0003 max mem: 8426 +[2024-12-10 14:35:26 root] (utils.py 283): INFO Epoch: [6] [ 20/2502] eta: 0:31:18 lr: 0.000019 loss_cls: 3.9307 (3.8993) grad_norm: 2.3046 (2.3047) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 14:35:34 root] (utils.py 283): INFO Epoch: [6] [ 30/2502] eta: 0:31:13 lr: 0.000019 loss_cls: 4.1843 (3.9800) grad_norm: 2.2962 (2.3078) time: 0.7564 data: 0.0002 max mem: 8426 +[2024-12-10 14:35:42 root] (utils.py 283): INFO Epoch: [6] [ 40/2502] eta: 0:31:08 lr: 0.000019 loss_cls: 3.9767 (3.9302) grad_norm: 2.2994 (2.3241) time: 0.7614 data: 0.0003 max mem: 8426 +[2024-12-10 14:35:49 root] (utils.py 283): INFO Epoch: [6] [ 50/2502] eta: 0:31:02 lr: 0.000019 loss_cls: 3.8521 (3.8995) grad_norm: 2.2900 (2.3160) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 14:35:57 root] (utils.py 283): INFO Epoch: [6] [ 60/2502] eta: 0:31:02 lr: 0.000019 loss_cls: 3.8521 (3.8620) grad_norm: 2.2595 (2.3171) time: 0.7703 data: 0.0003 max mem: 8426 +[2024-12-10 14:36:05 root] (utils.py 283): INFO Epoch: [6] [ 70/2502] eta: 0:31:05 lr: 0.000019 loss_cls: 4.1157 (3.9007) grad_norm: 2.2723 (2.3168) time: 0.7861 data: 0.0003 max mem: 8426 +[2024-12-10 14:36:13 root] (utils.py 283): INFO Epoch: [6] [ 80/2502] eta: 0:31:02 lr: 0.000019 loss_cls: 4.1979 (3.9229) grad_norm: 2.2772 (2.3143) time: 0.7876 data: 0.0003 max mem: 8426 +[2024-12-10 14:36:21 root] (utils.py 283): INFO Epoch: [6] [ 90/2502] eta: 0:30:58 lr: 0.000019 loss_cls: 4.1586 (3.9348) grad_norm: 2.2934 (2.3142) time: 0.7823 data: 0.0002 max mem: 8426 +[2024-12-10 14:36:28 root] (utils.py 283): INFO Epoch: [6] [ 100/2502] eta: 0:30:53 lr: 0.000019 loss_cls: 4.0509 (3.9392) grad_norm: 2.2914 (2.3090) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-10 14:36:36 root] (utils.py 283): INFO Epoch: [6] [ 110/2502] eta: 0:30:47 lr: 0.000019 loss_cls: 4.1017 (3.9421) grad_norm: 2.2392 (2.3028) time: 0.7809 data: 0.0003 max mem: 8426 +[2024-12-10 14:36:44 root] (utils.py 283): INFO Epoch: [6] [ 120/2502] eta: 0:30:41 lr: 0.000019 loss_cls: 3.9084 (3.9286) grad_norm: 2.2997 (2.3036) time: 0.7825 data: 0.0002 max mem: 8426 +[2024-12-10 14:36:52 root] (utils.py 283): INFO Epoch: [6] [ 130/2502] eta: 0:30:35 lr: 0.000019 loss_cls: 3.7956 (3.9365) grad_norm: 2.3733 (2.3158) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 14:37:00 root] (utils.py 283): INFO Epoch: [6] [ 140/2502] eta: 0:30:28 lr: 0.000019 loss_cls: 4.3035 (3.9646) grad_norm: 2.3234 (2.3157) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 14:37:08 root] (utils.py 283): INFO Epoch: [6] [ 150/2502] eta: 0:30:22 lr: 0.000019 loss_cls: 4.0266 (3.9551) grad_norm: 2.2532 (2.3101) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 14:37:15 root] (utils.py 283): INFO Epoch: [6] [ 160/2502] eta: 0:30:16 lr: 0.000019 loss_cls: 4.0644 (3.9610) grad_norm: 2.2517 (2.3108) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 14:37:23 root] (utils.py 283): INFO Epoch: [6] [ 170/2502] eta: 0:30:09 lr: 0.000019 loss_cls: 4.1549 (3.9709) grad_norm: 2.3400 (2.3124) time: 0.7832 data: 0.0002 max mem: 8426 +[2024-12-10 14:37:31 root] (utils.py 283): INFO Epoch: [6] [ 180/2502] eta: 0:30:02 lr: 0.000019 loss_cls: 4.0810 (3.9598) grad_norm: 2.3776 (2.3191) time: 0.7831 data: 0.0003 max mem: 8426 +[2024-12-10 14:37:39 root] (utils.py 283): INFO Epoch: [6] [ 190/2502] eta: 0:29:55 lr: 0.000019 loss_cls: 3.7907 (3.9537) grad_norm: 2.3238 (2.3161) time: 0.7821 data: 0.0003 max mem: 8426 +[2024-12-10 14:37:47 root] (utils.py 283): INFO Epoch: [6] [ 200/2502] eta: 0:29:48 lr: 0.000019 loss_cls: 4.0389 (3.9616) grad_norm: 2.1868 (2.3110) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 14:37:54 root] (utils.py 283): INFO Epoch: [6] [ 210/2502] eta: 0:29:40 lr: 0.000019 loss_cls: 4.1371 (3.9642) grad_norm: 2.2201 (2.3105) time: 0.7803 data: 0.0003 max mem: 8426 +[2024-12-10 14:38:02 root] (utils.py 283): INFO Epoch: [6] [ 220/2502] eta: 0:29:33 lr: 0.000019 loss_cls: 4.0357 (3.9669) grad_norm: 2.3176 (2.3147) time: 0.7803 data: 0.0003 max mem: 8426 +[2024-12-10 14:38:10 root] (utils.py 283): INFO Epoch: [6] [ 230/2502] eta: 0:29:25 lr: 0.000019 loss_cls: 3.9131 (3.9625) grad_norm: 2.2979 (2.3115) time: 0.7798 data: 0.0003 max mem: 8426 +[2024-12-10 14:38:18 root] (utils.py 283): INFO Epoch: [6] [ 240/2502] eta: 0:29:17 lr: 0.000019 loss_cls: 4.1956 (3.9697) grad_norm: 2.3029 (2.3137) time: 0.7750 data: 0.0002 max mem: 8426 +[2024-12-10 14:38:25 root] (utils.py 283): INFO Epoch: [6] [ 250/2502] eta: 0:29:08 lr: 0.000019 loss_cls: 4.1956 (3.9679) grad_norm: 2.3089 (2.3127) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 14:38:33 root] (utils.py 283): INFO Epoch: [6] [ 260/2502] eta: 0:29:01 lr: 0.000019 loss_cls: 4.0394 (3.9721) grad_norm: 2.2916 (2.3127) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-10 14:38:41 root] (utils.py 283): INFO Epoch: [6] [ 270/2502] eta: 0:28:51 lr: 0.000019 loss_cls: 4.2405 (3.9832) grad_norm: 2.3093 (2.3117) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 14:38:49 root] (utils.py 283): INFO Epoch: [6] [ 280/2502] eta: 0:28:44 lr: 0.000019 loss_cls: 4.2969 (3.9839) grad_norm: 2.3383 (2.3151) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 14:38:56 root] (utils.py 283): INFO Epoch: [6] [ 290/2502] eta: 0:28:34 lr: 0.000019 loss_cls: 4.0563 (3.9816) grad_norm: 2.3397 (2.3147) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 14:39:04 root] (utils.py 283): INFO Epoch: [6] [ 300/2502] eta: 0:28:26 lr: 0.000019 loss_cls: 4.1849 (3.9788) grad_norm: 2.2937 (2.3145) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 14:39:11 root] (utils.py 283): INFO Epoch: [6] [ 310/2502] eta: 0:28:17 lr: 0.000019 loss_cls: 4.2085 (3.9715) grad_norm: 2.2527 (2.3133) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 14:39:19 root] (utils.py 283): INFO Epoch: [6] [ 320/2502] eta: 0:28:09 lr: 0.000019 loss_cls: 3.9652 (3.9651) grad_norm: 2.2795 (2.3151) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 14:39:27 root] (utils.py 283): INFO Epoch: [6] [ 330/2502] eta: 0:28:00 lr: 0.000019 loss_cls: 3.8699 (3.9633) grad_norm: 2.3323 (2.3155) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 14:39:34 root] (utils.py 283): INFO Epoch: [6] [ 340/2502] eta: 0:27:51 lr: 0.000019 loss_cls: 3.9364 (3.9660) grad_norm: 2.2880 (2.3162) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 14:39:42 root] (utils.py 283): INFO Epoch: [6] [ 350/2502] eta: 0:27:43 lr: 0.000019 loss_cls: 4.2118 (3.9703) grad_norm: 2.2697 (2.3153) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 14:39:50 root] (utils.py 283): INFO Epoch: [6] [ 360/2502] eta: 0:27:36 lr: 0.000019 loss_cls: 4.1857 (3.9758) grad_norm: 2.2748 (2.3159) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 14:39:58 root] (utils.py 283): INFO Epoch: [6] [ 370/2502] eta: 0:27:29 lr: 0.000019 loss_cls: 4.1713 (3.9795) grad_norm: 2.2919 (2.3153) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:05 root] (utils.py 283): INFO Epoch: [6] [ 380/2502] eta: 0:27:21 lr: 0.000019 loss_cls: 3.8772 (3.9670) grad_norm: 2.2550 (2.3146) time: 0.7732 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:13 root] (utils.py 283): INFO Epoch: [6] [ 390/2502] eta: 0:27:12 lr: 0.000019 loss_cls: 3.5592 (3.9668) grad_norm: 2.2616 (2.3135) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:20 root] (utils.py 283): INFO Epoch: [6] [ 400/2502] eta: 0:27:04 lr: 0.000019 loss_cls: 3.9615 (3.9666) grad_norm: 2.2776 (2.3145) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:28 root] (utils.py 283): INFO Epoch: [6] [ 410/2502] eta: 0:26:56 lr: 0.000019 loss_cls: 3.9445 (3.9646) grad_norm: 2.2719 (2.3134) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:36 root] (utils.py 283): INFO Epoch: [6] [ 420/2502] eta: 0:26:47 lr: 0.000019 loss_cls: 3.8095 (3.9578) grad_norm: 2.2645 (2.3124) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:43 root] (utils.py 283): INFO Epoch: [6] [ 430/2502] eta: 0:26:39 lr: 0.000019 loss_cls: 4.0410 (3.9610) grad_norm: 2.2811 (2.3125) time: 0.7577 data: 0.0002 max mem: 8426 +[2024-12-10 14:40:51 root] (utils.py 283): INFO Epoch: [6] [ 440/2502] eta: 0:26:30 lr: 0.000019 loss_cls: 4.1826 (3.9617) grad_norm: 2.2944 (2.3132) time: 0.7582 data: 0.0003 max mem: 8426 +[2024-12-10 14:40:58 root] (utils.py 283): INFO Epoch: [6] [ 450/2502] eta: 0:26:23 lr: 0.000019 loss_cls: 4.1740 (3.9591) grad_norm: 2.2927 (2.3130) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 14:41:06 root] (utils.py 283): INFO Epoch: [6] [ 460/2502] eta: 0:26:14 lr: 0.000019 loss_cls: 3.7057 (3.9508) grad_norm: 2.3635 (2.3135) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:14 root] (utils.py 283): INFO Epoch: [6] [ 470/2502] eta: 0:26:06 lr: 0.000019 loss_cls: 3.7391 (3.9526) grad_norm: 2.3231 (2.3128) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:21 root] (utils.py 283): INFO Epoch: [6] [ 480/2502] eta: 0:25:58 lr: 0.000019 loss_cls: 4.2942 (3.9602) grad_norm: 2.3958 (2.3164) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:29 root] (utils.py 283): INFO Epoch: [6] [ 490/2502] eta: 0:25:49 lr: 0.000019 loss_cls: 4.2942 (3.9599) grad_norm: 2.3193 (2.3157) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:36 root] (utils.py 283): INFO Epoch: [6] [ 500/2502] eta: 0:25:42 lr: 0.000019 loss_cls: 4.0829 (3.9607) grad_norm: 2.2429 (2.3155) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:44 root] (utils.py 283): INFO Epoch: [6] [ 510/2502] eta: 0:25:34 lr: 0.000019 loss_cls: 4.2666 (3.9651) grad_norm: 2.2663 (2.3142) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:52 root] (utils.py 283): INFO Epoch: [6] [ 520/2502] eta: 0:25:26 lr: 0.000019 loss_cls: 4.2524 (3.9638) grad_norm: 2.2905 (2.3143) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 14:41:59 root] (utils.py 283): INFO Epoch: [6] [ 530/2502] eta: 0:25:17 lr: 0.000019 loss_cls: 4.1297 (3.9654) grad_norm: 2.3105 (2.3152) time: 0.7545 data: 0.0002 max mem: 8426 +[2024-12-10 14:42:07 root] (utils.py 283): INFO Epoch: [6] [ 540/2502] eta: 0:25:09 lr: 0.000019 loss_cls: 4.1297 (3.9672) grad_norm: 2.3105 (2.3155) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 14:42:14 root] (utils.py 283): INFO Epoch: [6] [ 550/2502] eta: 0:25:01 lr: 0.000019 loss_cls: 4.0323 (3.9689) grad_norm: 2.3260 (2.3164) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 14:42:22 root] (utils.py 283): INFO Epoch: [6] [ 560/2502] eta: 0:24:53 lr: 0.000019 loss_cls: 4.2469 (3.9732) grad_norm: 2.3855 (2.3172) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 14:42:30 root] (utils.py 283): INFO Epoch: [6] [ 570/2502] eta: 0:24:45 lr: 0.000019 loss_cls: 4.2361 (3.9726) grad_norm: 2.3478 (2.3183) time: 0.7577 data: 0.0003 max mem: 8426 +[2024-12-10 14:42:37 root] (utils.py 283): INFO Epoch: [6] [ 580/2502] eta: 0:24:37 lr: 0.000019 loss_cls: 3.7435 (3.9711) grad_norm: 2.2940 (2.3181) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 14:42:45 root] (utils.py 283): INFO Epoch: [6] [ 590/2502] eta: 0:24:29 lr: 0.000019 loss_cls: 3.9429 (3.9686) grad_norm: 2.2940 (2.3179) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 14:42:52 root] (utils.py 283): INFO Epoch: [6] [ 600/2502] eta: 0:24:21 lr: 0.000019 loss_cls: 4.1441 (3.9718) grad_norm: 2.3346 (2.3194) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:00 root] (utils.py 283): INFO Epoch: [6] [ 610/2502] eta: 0:24:13 lr: 0.000019 loss_cls: 4.1943 (3.9753) grad_norm: 2.3346 (2.3197) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:08 root] (utils.py 283): INFO Epoch: [6] [ 620/2502] eta: 0:24:05 lr: 0.000019 loss_cls: 3.8284 (3.9758) grad_norm: 2.2595 (2.3180) time: 0.7553 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:15 root] (utils.py 283): INFO Epoch: [6] [ 630/2502] eta: 0:23:57 lr: 0.000019 loss_cls: 3.8284 (3.9720) grad_norm: 2.2036 (2.3176) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:23 root] (utils.py 283): INFO Epoch: [6] [ 640/2502] eta: 0:23:50 lr: 0.000019 loss_cls: 3.8643 (3.9685) grad_norm: 2.3454 (2.3186) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:31 root] (utils.py 283): INFO Epoch: [6] [ 650/2502] eta: 0:23:42 lr: 0.000019 loss_cls: 3.8643 (3.9714) grad_norm: 2.3746 (2.3195) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:38 root] (utils.py 283): INFO Epoch: [6] [ 660/2502] eta: 0:23:34 lr: 0.000019 loss_cls: 4.1689 (3.9726) grad_norm: 2.3555 (2.3205) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:46 root] (utils.py 283): INFO Epoch: [6] [ 670/2502] eta: 0:23:27 lr: 0.000019 loss_cls: 4.0952 (3.9711) grad_norm: 2.3357 (2.3206) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 14:43:54 root] (utils.py 283): INFO Epoch: [6] [ 680/2502] eta: 0:23:19 lr: 0.000019 loss_cls: 3.7508 (3.9694) grad_norm: 2.2702 (2.3197) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:01 root] (utils.py 283): INFO Epoch: [6] [ 690/2502] eta: 0:23:11 lr: 0.000019 loss_cls: 3.9424 (3.9703) grad_norm: 2.2490 (2.3197) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:09 root] (utils.py 283): INFO Epoch: [6] [ 700/2502] eta: 0:23:03 lr: 0.000019 loss_cls: 3.9939 (3.9704) grad_norm: 2.3001 (2.3198) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:16 root] (utils.py 283): INFO Epoch: [6] [ 710/2502] eta: 0:22:55 lr: 0.000019 loss_cls: 3.9310 (3.9706) grad_norm: 2.2933 (2.3201) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:24 root] (utils.py 283): INFO Epoch: [6] [ 720/2502] eta: 0:22:47 lr: 0.000019 loss_cls: 3.9310 (3.9710) grad_norm: 2.3170 (2.3208) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:32 root] (utils.py 283): INFO Epoch: [6] [ 730/2502] eta: 0:22:39 lr: 0.000019 loss_cls: 4.1615 (3.9706) grad_norm: 2.3685 (2.3210) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:39 root] (utils.py 283): INFO Epoch: [6] [ 740/2502] eta: 0:22:31 lr: 0.000019 loss_cls: 4.1691 (3.9721) grad_norm: 2.3567 (2.3210) time: 0.7554 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:47 root] (utils.py 283): INFO Epoch: [6] [ 750/2502] eta: 0:22:24 lr: 0.000019 loss_cls: 4.1615 (3.9730) grad_norm: 2.3303 (2.3207) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 14:44:54 root] (utils.py 283): INFO Epoch: [6] [ 760/2502] eta: 0:22:16 lr: 0.000019 loss_cls: 4.0960 (3.9707) grad_norm: 2.3507 (2.3212) time: 0.7603 data: 0.0003 max mem: 8426 +[2024-12-10 14:45:02 root] (utils.py 283): INFO Epoch: [6] [ 770/2502] eta: 0:22:08 lr: 0.000019 loss_cls: 3.7628 (3.9686) grad_norm: 2.3434 (2.3212) time: 0.7563 data: 0.0003 max mem: 8426 +[2024-12-10 14:45:09 root] (utils.py 283): INFO Epoch: [6] [ 780/2502] eta: 0:22:00 lr: 0.000019 loss_cls: 4.1209 (3.9702) grad_norm: 2.3004 (2.3213) time: 0.7546 data: 0.0003 max mem: 8426 +[2024-12-10 14:45:17 root] (utils.py 283): INFO Epoch: [6] [ 790/2502] eta: 0:21:52 lr: 0.000019 loss_cls: 4.1209 (3.9708) grad_norm: 2.3004 (2.3216) time: 0.7550 data: 0.0003 max mem: 8426 +[2024-12-10 14:45:25 root] (utils.py 283): INFO Epoch: [6] [ 800/2502] eta: 0:21:44 lr: 0.000019 loss_cls: 3.8303 (3.9685) grad_norm: 2.3100 (2.3218) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:45:32 root] (utils.py 283): INFO Epoch: [6] [ 810/2502] eta: 0:21:36 lr: 0.000019 loss_cls: 3.8470 (3.9707) grad_norm: 2.3324 (2.3232) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 14:45:40 root] (utils.py 283): INFO Epoch: [6] [ 820/2502] eta: 0:21:29 lr: 0.000019 loss_cls: 4.3797 (3.9720) grad_norm: 2.3671 (2.3238) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 14:45:47 root] (utils.py 283): INFO Epoch: [6] [ 830/2502] eta: 0:21:21 lr: 0.000019 loss_cls: 4.1434 (3.9734) grad_norm: 2.3623 (2.3243) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 14:45:55 root] (utils.py 283): INFO Epoch: [6] [ 840/2502] eta: 0:21:13 lr: 0.000019 loss_cls: 4.1208 (3.9699) grad_norm: 2.3536 (2.3247) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 14:46:03 root] (utils.py 283): INFO Epoch: [6] [ 850/2502] eta: 0:21:05 lr: 0.000019 loss_cls: 4.1561 (3.9730) grad_norm: 2.3193 (2.3243) time: 0.7585 data: 0.0003 max mem: 8426 +[2024-12-10 14:46:10 root] (utils.py 283): INFO Epoch: [6] [ 860/2502] eta: 0:20:57 lr: 0.000019 loss_cls: 4.1244 (3.9729) grad_norm: 2.2630 (2.3241) time: 0.7581 data: 0.0003 max mem: 8426 +[2024-12-10 14:46:18 root] (utils.py 283): INFO Epoch: [6] [ 870/2502] eta: 0:20:50 lr: 0.000019 loss_cls: 4.2498 (3.9776) grad_norm: 2.2988 (2.3246) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 14:46:26 root] (utils.py 283): INFO Epoch: [6] [ 880/2502] eta: 0:20:42 lr: 0.000019 loss_cls: 4.3129 (3.9802) grad_norm: 2.3373 (2.3246) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 14:46:33 root] (utils.py 283): INFO Epoch: [6] [ 890/2502] eta: 0:20:34 lr: 0.000019 loss_cls: 4.0145 (3.9776) grad_norm: 2.2912 (2.3245) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 14:46:41 root] (utils.py 283): INFO Epoch: [6] [ 900/2502] eta: 0:20:27 lr: 0.000019 loss_cls: 3.7204 (3.9770) grad_norm: 2.2977 (2.3252) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 14:46:48 root] (utils.py 283): INFO Epoch: [6] [ 910/2502] eta: 0:20:19 lr: 0.000019 loss_cls: 3.8777 (3.9747) grad_norm: 2.3986 (2.3258) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:46:56 root] (utils.py 283): INFO Epoch: [6] [ 920/2502] eta: 0:20:11 lr: 0.000019 loss_cls: 3.4868 (3.9690) grad_norm: 2.3890 (2.3260) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 14:47:04 root] (utils.py 283): INFO Epoch: [6] [ 930/2502] eta: 0:20:04 lr: 0.000019 loss_cls: 3.7794 (3.9693) grad_norm: 2.2804 (2.3256) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 14:47:11 root] (utils.py 283): INFO Epoch: [6] [ 940/2502] eta: 0:19:56 lr: 0.000019 loss_cls: 3.9481 (3.9676) grad_norm: 2.2349 (2.3250) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 14:47:19 root] (utils.py 283): INFO Epoch: [6] [ 950/2502] eta: 0:19:48 lr: 0.000019 loss_cls: 3.9183 (3.9670) grad_norm: 2.2322 (2.3240) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 14:47:27 root] (utils.py 283): INFO Epoch: [6] [ 960/2502] eta: 0:19:40 lr: 0.000019 loss_cls: 4.2286 (3.9657) grad_norm: 2.2791 (2.3234) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 14:47:34 root] (utils.py 283): INFO Epoch: [6] [ 970/2502] eta: 0:19:33 lr: 0.000019 loss_cls: 3.9243 (3.9642) grad_norm: 2.2743 (2.3233) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 14:47:42 root] (utils.py 283): INFO Epoch: [6] [ 980/2502] eta: 0:19:25 lr: 0.000019 loss_cls: 3.6416 (3.9617) grad_norm: 2.2743 (2.3236) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:47:49 root] (utils.py 283): INFO Epoch: [6] [ 990/2502] eta: 0:19:17 lr: 0.000019 loss_cls: 3.9268 (3.9640) grad_norm: 2.2535 (2.3227) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 14:47:57 root] (utils.py 283): INFO Epoch: [6] [1000/2502] eta: 0:19:10 lr: 0.000019 loss_cls: 4.0223 (3.9628) grad_norm: 2.2535 (2.3224) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 14:48:05 root] (utils.py 283): INFO Epoch: [6] [1010/2502] eta: 0:19:02 lr: 0.000019 loss_cls: 3.9851 (3.9632) grad_norm: 2.2989 (2.3230) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 14:48:12 root] (utils.py 283): INFO Epoch: [6] [1020/2502] eta: 0:18:54 lr: 0.000019 loss_cls: 3.9851 (3.9618) grad_norm: 2.2741 (2.3226) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 14:48:20 root] (utils.py 283): INFO Epoch: [6] [1030/2502] eta: 0:18:46 lr: 0.000019 loss_cls: 4.0370 (3.9620) grad_norm: 2.2676 (2.3228) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 14:48:28 root] (utils.py 283): INFO Epoch: [6] [1040/2502] eta: 0:18:39 lr: 0.000019 loss_cls: 4.1681 (3.9608) grad_norm: 2.2676 (2.3227) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 14:48:35 root] (utils.py 283): INFO Epoch: [6] [1050/2502] eta: 0:18:31 lr: 0.000019 loss_cls: 3.9230 (3.9592) grad_norm: 2.3402 (2.3231) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 14:48:43 root] (utils.py 283): INFO Epoch: [6] [1060/2502] eta: 0:18:24 lr: 0.000019 loss_cls: 3.9780 (3.9594) grad_norm: 2.3194 (2.3229) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 14:48:51 root] (utils.py 283): INFO Epoch: [6] [1070/2502] eta: 0:18:16 lr: 0.000019 loss_cls: 4.2750 (3.9622) grad_norm: 2.2615 (2.3225) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 14:48:58 root] (utils.py 283): INFO Epoch: [6] [1080/2502] eta: 0:18:08 lr: 0.000019 loss_cls: 4.2010 (3.9608) grad_norm: 2.2580 (2.3219) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 14:49:06 root] (utils.py 283): INFO Epoch: [6] [1090/2502] eta: 0:18:00 lr: 0.000019 loss_cls: 3.9064 (3.9584) grad_norm: 2.2591 (2.3222) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 14:49:14 root] (utils.py 283): INFO Epoch: [6] [1100/2502] eta: 0:17:53 lr: 0.000019 loss_cls: 3.8494 (3.9580) grad_norm: 2.2251 (2.3216) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 14:49:21 root] (utils.py 283): INFO Epoch: [6] [1110/2502] eta: 0:17:45 lr: 0.000019 loss_cls: 3.7899 (3.9581) grad_norm: 2.2667 (2.3213) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 14:49:29 root] (utils.py 283): INFO Epoch: [6] [1120/2502] eta: 0:17:37 lr: 0.000019 loss_cls: 3.9130 (3.9575) grad_norm: 2.2696 (2.3210) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 14:49:36 root] (utils.py 283): INFO Epoch: [6] [1130/2502] eta: 0:17:30 lr: 0.000019 loss_cls: 4.1995 (3.9598) grad_norm: 2.2627 (2.3208) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 14:49:44 root] (utils.py 283): INFO Epoch: [6] [1140/2502] eta: 0:17:22 lr: 0.000019 loss_cls: 4.3005 (3.9621) grad_norm: 2.2853 (2.3205) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 14:49:52 root] (utils.py 283): INFO Epoch: [6] [1150/2502] eta: 0:17:14 lr: 0.000019 loss_cls: 4.0730 (3.9616) grad_norm: 2.2786 (2.3207) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 14:49:59 root] (utils.py 283): INFO Epoch: [6] [1160/2502] eta: 0:17:07 lr: 0.000019 loss_cls: 4.0561 (3.9613) grad_norm: 2.2754 (2.3203) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 14:50:07 root] (utils.py 283): INFO Epoch: [6] [1170/2502] eta: 0:16:59 lr: 0.000019 loss_cls: 3.8392 (3.9584) grad_norm: 2.3511 (2.3210) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 14:50:14 root] (utils.py 283): INFO Epoch: [6] [1180/2502] eta: 0:16:51 lr: 0.000019 loss_cls: 3.7539 (3.9597) grad_norm: 2.3511 (2.3210) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 14:50:22 root] (utils.py 283): INFO Epoch: [6] [1190/2502] eta: 0:16:44 lr: 0.000019 loss_cls: 4.2960 (3.9617) grad_norm: 2.2960 (2.3209) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 14:50:30 root] (utils.py 283): INFO Epoch: [6] [1200/2502] eta: 0:16:36 lr: 0.000019 loss_cls: 4.2815 (3.9624) grad_norm: 2.2523 (2.3207) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 14:50:37 root] (utils.py 283): INFO Epoch: [6] [1210/2502] eta: 0:16:28 lr: 0.000019 loss_cls: 4.2457 (3.9615) grad_norm: 2.2425 (2.3206) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 14:50:45 root] (utils.py 283): INFO Epoch: [6] [1220/2502] eta: 0:16:21 lr: 0.000019 loss_cls: 4.2122 (3.9627) grad_norm: 2.2613 (2.3202) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 14:50:53 root] (utils.py 283): INFO Epoch: [6] [1230/2502] eta: 0:16:13 lr: 0.000019 loss_cls: 3.9753 (3.9595) grad_norm: 2.2944 (2.3206) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 14:51:00 root] (utils.py 283): INFO Epoch: [6] [1240/2502] eta: 0:16:05 lr: 0.000019 loss_cls: 3.6832 (3.9593) grad_norm: 2.2989 (2.3210) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 14:51:08 root] (utils.py 283): INFO Epoch: [6] [1250/2502] eta: 0:15:58 lr: 0.000019 loss_cls: 3.9233 (3.9604) grad_norm: 2.3218 (2.3210) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 14:51:16 root] (utils.py 283): INFO Epoch: [6] [1260/2502] eta: 0:15:50 lr: 0.000019 loss_cls: 3.9229 (3.9582) grad_norm: 2.3090 (2.3206) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 14:51:23 root] (utils.py 283): INFO Epoch: [6] [1270/2502] eta: 0:15:42 lr: 0.000019 loss_cls: 3.9229 (3.9578) grad_norm: 2.3158 (2.3206) time: 0.7695 data: 0.0003 max mem: 8426 +[2024-12-10 14:51:31 root] (utils.py 283): INFO Epoch: [6] [1280/2502] eta: 0:15:35 lr: 0.000019 loss_cls: 3.9513 (3.9575) grad_norm: 2.3322 (2.3207) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 14:51:39 root] (utils.py 283): INFO Epoch: [6] [1290/2502] eta: 0:15:27 lr: 0.000019 loss_cls: 3.8030 (3.9561) grad_norm: 2.2769 (2.3204) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 14:51:46 root] (utils.py 283): INFO Epoch: [6] [1300/2502] eta: 0:15:19 lr: 0.000019 loss_cls: 3.8493 (3.9564) grad_norm: 2.2309 (2.3197) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 14:51:54 root] (utils.py 283): INFO Epoch: [6] [1310/2502] eta: 0:15:12 lr: 0.000019 loss_cls: 4.0856 (3.9572) grad_norm: 2.2967 (2.3206) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 14:52:02 root] (utils.py 283): INFO Epoch: [6] [1320/2502] eta: 0:15:04 lr: 0.000019 loss_cls: 4.1077 (3.9573) grad_norm: 2.4011 (2.3210) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 14:52:09 root] (utils.py 283): INFO Epoch: [6] [1330/2502] eta: 0:14:56 lr: 0.000019 loss_cls: 4.0591 (3.9569) grad_norm: 2.3442 (2.3211) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 14:52:17 root] (utils.py 283): INFO Epoch: [6] [1340/2502] eta: 0:14:49 lr: 0.000019 loss_cls: 3.7891 (3.9549) grad_norm: 2.2781 (2.3209) time: 0.7794 data: 0.0003 max mem: 8426 +[2024-12-10 14:52:25 root] (utils.py 283): INFO Epoch: [6] [1350/2502] eta: 0:14:41 lr: 0.000019 loss_cls: 3.7891 (3.9537) grad_norm: 2.2872 (2.3211) time: 0.7789 data: 0.0003 max mem: 8426 +[2024-12-10 14:52:33 root] (utils.py 283): INFO Epoch: [6] [1360/2502] eta: 0:14:34 lr: 0.000019 loss_cls: 3.9067 (3.9524) grad_norm: 2.3383 (2.3215) time: 0.7788 data: 0.0003 max mem: 8426 +[2024-12-10 14:52:41 root] (utils.py 283): INFO Epoch: [6] [1370/2502] eta: 0:14:26 lr: 0.000019 loss_cls: 3.6576 (3.9514) grad_norm: 2.3629 (2.3217) time: 0.7794 data: 0.0002 max mem: 8426 +[2024-12-10 14:52:48 root] (utils.py 283): INFO Epoch: [6] [1380/2502] eta: 0:14:19 lr: 0.000019 loss_cls: 3.9895 (3.9516) grad_norm: 2.4034 (2.3227) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 14:52:56 root] (utils.py 283): INFO Epoch: [6] [1390/2502] eta: 0:14:11 lr: 0.000019 loss_cls: 3.9895 (3.9516) grad_norm: 2.3774 (2.3229) time: 0.7804 data: 0.0003 max mem: 8426 +[2024-12-10 14:53:04 root] (utils.py 283): INFO Epoch: [6] [1400/2502] eta: 0:14:04 lr: 0.000019 loss_cls: 4.0174 (3.9525) grad_norm: 2.2822 (2.3228) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 14:53:12 root] (utils.py 283): INFO Epoch: [6] [1410/2502] eta: 0:13:56 lr: 0.000019 loss_cls: 3.7811 (3.9512) grad_norm: 2.2744 (2.3225) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 14:53:19 root] (utils.py 283): INFO Epoch: [6] [1420/2502] eta: 0:13:48 lr: 0.000019 loss_cls: 3.7849 (3.9511) grad_norm: 2.2858 (2.3222) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 14:53:27 root] (utils.py 283): INFO Epoch: [6] [1430/2502] eta: 0:13:41 lr: 0.000019 loss_cls: 4.1609 (3.9540) grad_norm: 2.2833 (2.3219) time: 0.7588 data: 0.0003 max mem: 8426 +[2024-12-10 14:53:34 root] (utils.py 283): INFO Epoch: [6] [1440/2502] eta: 0:13:33 lr: 0.000019 loss_cls: 4.0988 (3.9532) grad_norm: 2.2903 (2.3220) time: 0.7598 data: 0.0003 max mem: 8426 +[2024-12-10 14:53:42 root] (utils.py 283): INFO Epoch: [6] [1450/2502] eta: 0:13:25 lr: 0.000019 loss_cls: 3.9076 (3.9517) grad_norm: 2.3545 (2.3226) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 14:53:50 root] (utils.py 283): INFO Epoch: [6] [1460/2502] eta: 0:13:18 lr: 0.000019 loss_cls: 3.9076 (3.9522) grad_norm: 2.3587 (2.3229) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 14:53:57 root] (utils.py 283): INFO Epoch: [6] [1470/2502] eta: 0:13:10 lr: 0.000019 loss_cls: 3.8572 (3.9511) grad_norm: 2.2870 (2.3224) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 14:54:05 root] (utils.py 283): INFO Epoch: [6] [1480/2502] eta: 0:13:02 lr: 0.000019 loss_cls: 3.7395 (3.9513) grad_norm: 2.2675 (2.3223) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:12 root] (utils.py 283): INFO Epoch: [6] [1490/2502] eta: 0:12:55 lr: 0.000019 loss_cls: 4.2448 (3.9537) grad_norm: 2.3264 (2.3226) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:20 root] (utils.py 283): INFO Epoch: [6] [1500/2502] eta: 0:12:47 lr: 0.000019 loss_cls: 4.3886 (3.9557) grad_norm: 2.3278 (2.3226) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:28 root] (utils.py 283): INFO Epoch: [6] [1510/2502] eta: 0:12:39 lr: 0.000019 loss_cls: 4.2260 (3.9547) grad_norm: 2.3294 (2.3230) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:35 root] (utils.py 283): INFO Epoch: [6] [1520/2502] eta: 0:12:31 lr: 0.000019 loss_cls: 3.9374 (3.9551) grad_norm: 2.3425 (2.3232) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:43 root] (utils.py 283): INFO Epoch: [6] [1530/2502] eta: 0:12:24 lr: 0.000019 loss_cls: 3.9320 (3.9536) grad_norm: 2.2604 (2.3227) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:51 root] (utils.py 283): INFO Epoch: [6] [1540/2502] eta: 0:12:16 lr: 0.000019 loss_cls: 3.8169 (3.9520) grad_norm: 2.2490 (2.3224) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 14:54:58 root] (utils.py 283): INFO Epoch: [6] [1550/2502] eta: 0:12:09 lr: 0.000019 loss_cls: 3.9472 (3.9512) grad_norm: 2.2616 (2.3225) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 14:55:06 root] (utils.py 283): INFO Epoch: [6] [1560/2502] eta: 0:12:01 lr: 0.000019 loss_cls: 4.0348 (3.9513) grad_norm: 2.3222 (2.3229) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 14:55:14 root] (utils.py 283): INFO Epoch: [6] [1570/2502] eta: 0:11:53 lr: 0.000019 loss_cls: 4.1264 (3.9517) grad_norm: 2.3065 (2.3223) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 14:55:21 root] (utils.py 283): INFO Epoch: [6] [1580/2502] eta: 0:11:46 lr: 0.000019 loss_cls: 4.2597 (3.9534) grad_norm: 2.2705 (2.3227) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 14:55:29 root] (utils.py 283): INFO Epoch: [6] [1590/2502] eta: 0:11:38 lr: 0.000019 loss_cls: 3.9202 (3.9522) grad_norm: 2.3806 (2.3233) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 14:55:37 root] (utils.py 283): INFO Epoch: [6] [1600/2502] eta: 0:11:30 lr: 0.000019 loss_cls: 3.8608 (3.9524) grad_norm: 2.3717 (2.3236) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 14:55:44 root] (utils.py 283): INFO Epoch: [6] [1610/2502] eta: 0:11:22 lr: 0.000019 loss_cls: 3.9334 (3.9519) grad_norm: 2.3676 (2.3238) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 14:55:52 root] (utils.py 283): INFO Epoch: [6] [1620/2502] eta: 0:11:15 lr: 0.000019 loss_cls: 4.0613 (3.9533) grad_norm: 2.3013 (2.3234) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 14:55:59 root] (utils.py 283): INFO Epoch: [6] [1630/2502] eta: 0:11:07 lr: 0.000019 loss_cls: 4.1278 (3.9529) grad_norm: 2.2759 (2.3234) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 14:56:07 root] (utils.py 283): INFO Epoch: [6] [1640/2502] eta: 0:10:59 lr: 0.000019 loss_cls: 4.0857 (3.9523) grad_norm: 2.2614 (2.3232) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 14:56:15 root] (utils.py 283): INFO Epoch: [6] [1650/2502] eta: 0:10:52 lr: 0.000019 loss_cls: 4.0857 (3.9528) grad_norm: 2.2996 (2.3234) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 14:56:22 root] (utils.py 283): INFO Epoch: [6] [1660/2502] eta: 0:10:44 lr: 0.000019 loss_cls: 3.9506 (3.9527) grad_norm: 2.3187 (2.3236) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 14:56:30 root] (utils.py 283): INFO Epoch: [6] [1670/2502] eta: 0:10:37 lr: 0.000019 loss_cls: 3.9898 (3.9526) grad_norm: 2.2827 (2.3233) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 14:56:38 root] (utils.py 283): INFO Epoch: [6] [1680/2502] eta: 0:10:29 lr: 0.000019 loss_cls: 3.9898 (3.9526) grad_norm: 2.3125 (2.3238) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 14:56:45 root] (utils.py 283): INFO Epoch: [6] [1690/2502] eta: 0:10:21 lr: 0.000019 loss_cls: 4.2164 (3.9544) grad_norm: 2.3757 (2.3239) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 14:56:53 root] (utils.py 283): INFO Epoch: [6] [1700/2502] eta: 0:10:13 lr: 0.000019 loss_cls: 4.2032 (3.9554) grad_norm: 2.3171 (2.3241) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 14:57:01 root] (utils.py 283): INFO Epoch: [6] [1710/2502] eta: 0:10:06 lr: 0.000019 loss_cls: 3.8844 (3.9536) grad_norm: 2.3171 (2.3238) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 14:57:08 root] (utils.py 283): INFO Epoch: [6] [1720/2502] eta: 0:09:58 lr: 0.000019 loss_cls: 3.6148 (3.9522) grad_norm: 2.2471 (2.3234) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 14:57:16 root] (utils.py 283): INFO Epoch: [6] [1730/2502] eta: 0:09:50 lr: 0.000019 loss_cls: 4.0597 (3.9528) grad_norm: 2.2257 (2.3234) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 14:57:23 root] (utils.py 283): INFO Epoch: [6] [1740/2502] eta: 0:09:43 lr: 0.000019 loss_cls: 4.1214 (3.9543) grad_norm: 2.2702 (2.3233) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 14:57:31 root] (utils.py 283): INFO Epoch: [6] [1750/2502] eta: 0:09:35 lr: 0.000019 loss_cls: 4.2921 (3.9554) grad_norm: 2.2687 (2.3230) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 14:57:39 root] (utils.py 283): INFO Epoch: [6] [1760/2502] eta: 0:09:28 lr: 0.000019 loss_cls: 4.2921 (3.9580) grad_norm: 2.2649 (2.3227) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 14:57:47 root] (utils.py 283): INFO Epoch: [6] [1770/2502] eta: 0:09:20 lr: 0.000019 loss_cls: 4.1986 (3.9575) grad_norm: 2.2649 (2.3225) time: 0.7736 data: 0.0003 max mem: 8426 +[2024-12-10 14:57:54 root] (utils.py 283): INFO Epoch: [6] [1780/2502] eta: 0:09:12 lr: 0.000019 loss_cls: 3.6620 (3.9551) grad_norm: 2.2763 (2.3224) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 14:58:02 root] (utils.py 283): INFO Epoch: [6] [1790/2502] eta: 0:09:05 lr: 0.000019 loss_cls: 3.6620 (3.9545) grad_norm: 2.2882 (2.3223) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 14:58:10 root] (utils.py 283): INFO Epoch: [6] [1800/2502] eta: 0:08:57 lr: 0.000019 loss_cls: 4.0923 (3.9537) grad_norm: 2.2882 (2.3222) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 14:58:17 root] (utils.py 283): INFO Epoch: [6] [1810/2502] eta: 0:08:49 lr: 0.000019 loss_cls: 3.7580 (3.9535) grad_norm: 2.2345 (2.3221) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 14:58:25 root] (utils.py 283): INFO Epoch: [6] [1820/2502] eta: 0:08:42 lr: 0.000019 loss_cls: 4.1146 (3.9544) grad_norm: 2.2715 (2.3229) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 14:58:32 root] (utils.py 283): INFO Epoch: [6] [1830/2502] eta: 0:08:34 lr: 0.000019 loss_cls: 4.1405 (3.9554) grad_norm: 2.3812 (2.3232) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 14:58:40 root] (utils.py 283): INFO Epoch: [6] [1840/2502] eta: 0:08:26 lr: 0.000019 loss_cls: 4.1089 (3.9564) grad_norm: 2.3473 (2.3234) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 14:58:48 root] (utils.py 283): INFO Epoch: [6] [1850/2502] eta: 0:08:19 lr: 0.000019 loss_cls: 3.9058 (3.9553) grad_norm: 2.3863 (2.3243) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 14:58:55 root] (utils.py 283): INFO Epoch: [6] [1860/2502] eta: 0:08:11 lr: 0.000019 loss_cls: 4.0817 (3.9562) grad_norm: 2.3863 (2.3242) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 14:59:03 root] (utils.py 283): INFO Epoch: [6] [1870/2502] eta: 0:08:03 lr: 0.000019 loss_cls: 4.1302 (3.9569) grad_norm: 2.2803 (2.3239) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 14:59:11 root] (utils.py 283): INFO Epoch: [6] [1880/2502] eta: 0:07:56 lr: 0.000019 loss_cls: 4.1199 (3.9565) grad_norm: 2.2790 (2.3237) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 14:59:18 root] (utils.py 283): INFO Epoch: [6] [1890/2502] eta: 0:07:48 lr: 0.000019 loss_cls: 3.9337 (3.9550) grad_norm: 2.2790 (2.3235) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-10 14:59:26 root] (utils.py 283): INFO Epoch: [6] [1900/2502] eta: 0:07:40 lr: 0.000019 loss_cls: 3.5604 (3.9539) grad_norm: 2.2548 (2.3236) time: 0.7580 data: 0.0003 max mem: 8426 +[2024-12-10 14:59:33 root] (utils.py 283): INFO Epoch: [6] [1910/2502] eta: 0:07:33 lr: 0.000019 loss_cls: 4.1594 (3.9551) grad_norm: 2.3122 (2.3240) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 14:59:41 root] (utils.py 283): INFO Epoch: [6] [1920/2502] eta: 0:07:25 lr: 0.000019 loss_cls: 4.0838 (3.9544) grad_norm: 2.2949 (2.3239) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 14:59:49 root] (utils.py 283): INFO Epoch: [6] [1930/2502] eta: 0:07:17 lr: 0.000019 loss_cls: 3.8448 (3.9534) grad_norm: 2.2906 (2.3237) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 14:59:56 root] (utils.py 283): INFO Epoch: [6] [1940/2502] eta: 0:07:10 lr: 0.000019 loss_cls: 4.0499 (3.9552) grad_norm: 2.3099 (2.3237) time: 0.7731 data: 0.0003 max mem: 8426 +[2024-12-10 15:00:04 root] (utils.py 283): INFO Epoch: [6] [1950/2502] eta: 0:07:02 lr: 0.000019 loss_cls: 4.2977 (3.9548) grad_norm: 2.2735 (2.3235) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 15:00:12 root] (utils.py 283): INFO Epoch: [6] [1960/2502] eta: 0:06:54 lr: 0.000019 loss_cls: 3.7682 (3.9537) grad_norm: 2.2728 (2.3234) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 15:00:19 root] (utils.py 283): INFO Epoch: [6] [1970/2502] eta: 0:06:47 lr: 0.000019 loss_cls: 3.9144 (3.9535) grad_norm: 2.2905 (2.3233) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 15:00:27 root] (utils.py 283): INFO Epoch: [6] [1980/2502] eta: 0:06:39 lr: 0.000019 loss_cls: 4.0823 (3.9533) grad_norm: 2.2572 (2.3227) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 15:00:35 root] (utils.py 283): INFO Epoch: [6] [1990/2502] eta: 0:06:31 lr: 0.000019 loss_cls: 4.0823 (3.9544) grad_norm: 2.2572 (2.3225) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 15:00:42 root] (utils.py 283): INFO Epoch: [6] [2000/2502] eta: 0:06:24 lr: 0.000019 loss_cls: 4.0524 (3.9541) grad_norm: 2.2612 (2.3222) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 15:00:50 root] (utils.py 283): INFO Epoch: [6] [2010/2502] eta: 0:06:16 lr: 0.000019 loss_cls: 3.7275 (3.9517) grad_norm: 2.2381 (2.3220) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 15:00:57 root] (utils.py 283): INFO Epoch: [6] [2020/2502] eta: 0:06:08 lr: 0.000019 loss_cls: 3.4247 (3.9510) grad_norm: 2.2773 (2.3220) time: 0.7583 data: 0.0002 max mem: 8426 +[2024-12-10 15:01:05 root] (utils.py 283): INFO Epoch: [6] [2030/2502] eta: 0:06:01 lr: 0.000019 loss_cls: 3.9002 (3.9507) grad_norm: 2.3248 (2.3225) time: 0.7574 data: 0.0003 max mem: 8426 +[2024-12-10 15:01:13 root] (utils.py 283): INFO Epoch: [6] [2040/2502] eta: 0:05:53 lr: 0.000019 loss_cls: 3.9002 (3.9500) grad_norm: 2.3248 (2.3225) time: 0.7579 data: 0.0003 max mem: 8426 +[2024-12-10 15:01:20 root] (utils.py 283): INFO Epoch: [6] [2050/2502] eta: 0:05:45 lr: 0.000019 loss_cls: 4.0682 (3.9504) grad_norm: 2.3302 (2.3228) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 15:01:28 root] (utils.py 283): INFO Epoch: [6] [2060/2502] eta: 0:05:38 lr: 0.000019 loss_cls: 4.0673 (3.9498) grad_norm: 2.3363 (2.3229) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 15:01:35 root] (utils.py 283): INFO Epoch: [6] [2070/2502] eta: 0:05:30 lr: 0.000019 loss_cls: 4.0673 (3.9505) grad_norm: 2.3363 (2.3234) time: 0.7593 data: 0.0003 max mem: 8426 +[2024-12-10 15:01:43 root] (utils.py 283): INFO Epoch: [6] [2080/2502] eta: 0:05:22 lr: 0.000019 loss_cls: 4.1037 (3.9503) grad_norm: 2.3790 (2.3239) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 15:01:51 root] (utils.py 283): INFO Epoch: [6] [2090/2502] eta: 0:05:15 lr: 0.000019 loss_cls: 4.1037 (3.9511) grad_norm: 2.3790 (2.3242) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 15:01:58 root] (utils.py 283): INFO Epoch: [6] [2100/2502] eta: 0:05:07 lr: 0.000019 loss_cls: 4.0673 (3.9508) grad_norm: 2.3509 (2.3241) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 15:02:06 root] (utils.py 283): INFO Epoch: [6] [2110/2502] eta: 0:04:59 lr: 0.000019 loss_cls: 4.0673 (3.9520) grad_norm: 2.2966 (2.3242) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 15:02:14 root] (utils.py 283): INFO Epoch: [6] [2120/2502] eta: 0:04:52 lr: 0.000019 loss_cls: 4.1716 (3.9515) grad_norm: 2.2551 (2.3240) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 15:02:21 root] (utils.py 283): INFO Epoch: [6] [2130/2502] eta: 0:04:44 lr: 0.000019 loss_cls: 4.0838 (3.9518) grad_norm: 2.3411 (2.3240) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 15:02:29 root] (utils.py 283): INFO Epoch: [6] [2140/2502] eta: 0:04:36 lr: 0.000019 loss_cls: 4.1473 (3.9520) grad_norm: 2.3600 (2.3243) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 15:02:37 root] (utils.py 283): INFO Epoch: [6] [2150/2502] eta: 0:04:29 lr: 0.000019 loss_cls: 4.3788 (3.9531) grad_norm: 2.3726 (2.3244) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 15:02:44 root] (utils.py 283): INFO Epoch: [6] [2160/2502] eta: 0:04:21 lr: 0.000019 loss_cls: 3.8276 (3.9527) grad_norm: 2.3173 (2.3242) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 15:02:52 root] (utils.py 283): INFO Epoch: [6] [2170/2502] eta: 0:04:14 lr: 0.000019 loss_cls: 3.7835 (3.9515) grad_norm: 2.3032 (2.3243) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 15:02:59 root] (utils.py 283): INFO Epoch: [6] [2180/2502] eta: 0:04:06 lr: 0.000019 loss_cls: 3.4128 (3.9503) grad_norm: 2.3138 (2.3245) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 15:03:07 root] (utils.py 283): INFO Epoch: [6] [2190/2502] eta: 0:03:58 lr: 0.000019 loss_cls: 3.9702 (3.9510) grad_norm: 2.3289 (2.3248) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 15:03:15 root] (utils.py 283): INFO Epoch: [6] [2200/2502] eta: 0:03:51 lr: 0.000019 loss_cls: 4.0672 (3.9513) grad_norm: 2.3289 (2.3249) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 15:03:22 root] (utils.py 283): INFO Epoch: [6] [2210/2502] eta: 0:03:43 lr: 0.000019 loss_cls: 4.0672 (3.9507) grad_norm: 2.3616 (2.3253) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 15:03:30 root] (utils.py 283): INFO Epoch: [6] [2220/2502] eta: 0:03:35 lr: 0.000019 loss_cls: 3.9795 (3.9503) grad_norm: 2.3236 (2.3251) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 15:03:38 root] (utils.py 283): INFO Epoch: [6] [2230/2502] eta: 0:03:28 lr: 0.000019 loss_cls: 3.9874 (3.9507) grad_norm: 2.3287 (2.3253) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 15:03:45 root] (utils.py 283): INFO Epoch: [6] [2240/2502] eta: 0:03:20 lr: 0.000019 loss_cls: 4.0728 (3.9507) grad_norm: 2.3617 (2.3254) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 15:03:53 root] (utils.py 283): INFO Epoch: [6] [2250/2502] eta: 0:03:12 lr: 0.000019 loss_cls: 4.0822 (3.9512) grad_norm: 2.3311 (2.3254) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 15:04:01 root] (utils.py 283): INFO Epoch: [6] [2260/2502] eta: 0:03:05 lr: 0.000019 loss_cls: 3.8717 (3.9503) grad_norm: 2.3328 (2.3254) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 15:04:08 root] (utils.py 283): INFO Epoch: [6] [2270/2502] eta: 0:02:57 lr: 0.000019 loss_cls: 3.8675 (3.9503) grad_norm: 2.3497 (2.3255) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 15:04:16 root] (utils.py 283): INFO Epoch: [6] [2280/2502] eta: 0:02:49 lr: 0.000019 loss_cls: 4.1950 (3.9513) grad_norm: 2.3668 (2.3256) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 15:04:24 root] (utils.py 283): INFO Epoch: [6] [2290/2502] eta: 0:02:42 lr: 0.000019 loss_cls: 4.1097 (3.9515) grad_norm: 2.3417 (2.3258) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 15:04:31 root] (utils.py 283): INFO Epoch: [6] [2300/2502] eta: 0:02:34 lr: 0.000019 loss_cls: 4.0753 (3.9520) grad_norm: 2.2992 (2.3256) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 15:04:39 root] (utils.py 283): INFO Epoch: [6] [2310/2502] eta: 0:02:26 lr: 0.000019 loss_cls: 4.1439 (3.9519) grad_norm: 2.2910 (2.3255) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 15:04:47 root] (utils.py 283): INFO Epoch: [6] [2320/2502] eta: 0:02:19 lr: 0.000019 loss_cls: 4.1439 (3.9529) grad_norm: 2.3125 (2.3255) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 15:04:54 root] (utils.py 283): INFO Epoch: [6] [2330/2502] eta: 0:02:11 lr: 0.000019 loss_cls: 4.0499 (3.9519) grad_norm: 2.3273 (2.3254) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 15:05:02 root] (utils.py 283): INFO Epoch: [6] [2340/2502] eta: 0:02:03 lr: 0.000019 loss_cls: 3.9303 (3.9524) grad_norm: 2.3219 (2.3254) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 15:05:10 root] (utils.py 283): INFO Epoch: [6] [2350/2502] eta: 0:01:56 lr: 0.000019 loss_cls: 4.0816 (3.9516) grad_norm: 2.3128 (2.3254) time: 0.7727 data: 0.0003 max mem: 8426 +[2024-12-10 15:05:17 root] (utils.py 283): INFO Epoch: [6] [2360/2502] eta: 0:01:48 lr: 0.000019 loss_cls: 3.8392 (3.9514) grad_norm: 2.3036 (2.3253) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 15:05:25 root] (utils.py 283): INFO Epoch: [6] [2370/2502] eta: 0:01:41 lr: 0.000019 loss_cls: 3.7842 (3.9501) grad_norm: 2.3378 (2.3255) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 15:05:33 root] (utils.py 283): INFO Epoch: [6] [2380/2502] eta: 0:01:33 lr: 0.000019 loss_cls: 3.8412 (3.9505) grad_norm: 2.3509 (2.3255) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 15:05:40 root] (utils.py 283): INFO Epoch: [6] [2390/2502] eta: 0:01:25 lr: 0.000019 loss_cls: 4.1383 (3.9512) grad_norm: 2.2904 (2.3255) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 15:05:48 root] (utils.py 283): INFO Epoch: [6] [2400/2502] eta: 0:01:18 lr: 0.000019 loss_cls: 4.0006 (3.9514) grad_norm: 2.3071 (2.3255) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 15:05:55 root] (utils.py 283): INFO Epoch: [6] [2410/2502] eta: 0:01:10 lr: 0.000019 loss_cls: 4.2234 (3.9530) grad_norm: 2.3234 (2.3254) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:03 root] (utils.py 283): INFO Epoch: [6] [2420/2502] eta: 0:01:02 lr: 0.000019 loss_cls: 4.2307 (3.9527) grad_norm: 2.2630 (2.3254) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:11 root] (utils.py 283): INFO Epoch: [6] [2430/2502] eta: 0:00:55 lr: 0.000019 loss_cls: 3.8420 (3.9523) grad_norm: 2.3499 (2.3257) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:18 root] (utils.py 283): INFO Epoch: [6] [2440/2502] eta: 0:00:47 lr: 0.000019 loss_cls: 3.7686 (3.9512) grad_norm: 2.3389 (2.3257) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 15:06:26 root] (utils.py 283): INFO Epoch: [6] [2450/2502] eta: 0:00:39 lr: 0.000019 loss_cls: 3.7686 (3.9506) grad_norm: 2.2646 (2.3255) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:34 root] (utils.py 283): INFO Epoch: [6] [2460/2502] eta: 0:00:32 lr: 0.000019 loss_cls: 4.0990 (3.9507) grad_norm: 2.2632 (2.3254) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:41 root] (utils.py 283): INFO Epoch: [6] [2470/2502] eta: 0:00:24 lr: 0.000019 loss_cls: 4.2122 (3.9507) grad_norm: 2.2563 (2.3254) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:49 root] (utils.py 283): INFO Epoch: [6] [2480/2502] eta: 0:00:16 lr: 0.000019 loss_cls: 4.2137 (3.9507) grad_norm: 2.2677 (2.3254) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 15:06:57 root] (utils.py 283): INFO Epoch: [6] [2490/2502] eta: 0:00:09 lr: 0.000019 loss_cls: 3.8320 (3.9495) grad_norm: 2.3216 (2.3255) time: 0.7875 data: 0.0234 max mem: 8426 +[2024-12-10 15:07:05 root] (utils.py 283): INFO Epoch: [6] [2500/2502] eta: 0:00:01 lr: 0.000019 loss_cls: 3.8320 (3.9500) grad_norm: 2.2993 (2.3254) time: 0.7861 data: 0.0235 max mem: 8426 +[2024-12-10 15:07:06 root] (utils.py 283): INFO Epoch: [6] [2501/2502] eta: 0:00:00 lr: 0.000019 loss_cls: 3.8642 (3.9501) grad_norm: 2.2993 (2.3255) time: 0.7862 data: 0.0235 max mem: 8426 +[2024-12-10 15:07:06 root] (utils.py 297): INFO Epoch: [6] Total time: 0:31:55 (0.7654 s / it) +[2024-12-10 15:07:06 root] (engine.py 179): INFO Averaged stats:lr: 0.000019 loss_cls: 3.8642 (3.9625) grad_norm: 2.2993 (2.3255) +[2024-12-10 15:07:06 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6363 (0.6363) acc1: 85.9375 (85.9375) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1275 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:07 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7189 (0.8111) acc1: 85.9375 (82.1023) acc3: 95.3125 (93.2528) acc5: 96.8750 (96.5909) time: 0.1277 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:09 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8544 (0.8627) acc1: 79.6875 (80.7664) acc3: 92.1875 (92.7083) acc5: 96.0938 (95.6845) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 15:07:10 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8958 (0.8773) acc1: 78.1250 (79.7631) acc3: 92.1875 (92.9688) acc5: 96.0938 (95.5897) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 15:07:11 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8256 (0.8630) acc1: 79.6875 (80.4116) acc3: 94.5312 (93.1974) acc5: 96.0938 (95.7698) time: 0.1285 data: 0.0004 max mem: 8426 +[2024-12-10 15:07:13 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0644 (0.9488) acc1: 75.0000 (78.5386) acc3: 88.2812 (91.6973) acc5: 92.1875 (94.7610) time: 0.1289 data: 0.0005 max mem: 8426 +[2024-12-10 15:07:14 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2774 (0.9933) acc1: 72.6562 (77.8817) acc3: 85.9375 (90.8043) acc5: 89.8438 (93.9549) time: 0.1526 data: 0.0241 max mem: 8426 +[2024-12-10 15:07:16 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2118 (1.0341) acc1: 75.0000 (76.9586) acc3: 86.7188 (90.3059) acc5: 90.6250 (93.5409) time: 0.1561 data: 0.0278 max mem: 8426 +[2024-12-10 15:07:18 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2429 (1.0691) acc1: 72.6562 (76.1574) acc3: 85.9375 (89.6412) acc5: 90.6250 (93.0652) time: 0.1677 data: 0.0385 max mem: 8426 +[2024-12-10 15:07:19 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2610 (1.0976) acc1: 71.8750 (75.4722) acc3: 85.1562 (89.3201) acc5: 89.8438 (92.8056) time: 0.1699 data: 0.0398 max mem: 8426 +[2024-12-10 15:07:20 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1375 (1.0864) acc1: 74.2188 (75.7120) acc3: 87.5000 (89.4960) acc5: 91.4062 (92.9680) time: 0.1558 data: 0.0272 max mem: 8426 +[2024-12-10 15:07:20 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1471 s / it) +[2024-12-10 15:07:20 root] (engine.py 264): INFO * Acc@1 75.558 Acc@3 89.540 Acc@5 92.784 loss 1.089 flops 1.285 layer_flops 1.251 +[2024-12-10 15:07:20 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 15:07:20 root] (main.py 576): INFO Max accuracy: 75.57% +[2024-12-10 15:07:21 root] (utils.py 283): INFO Epoch: [7] [ 0/2502] eta: 0:31:57 lr: 0.000018 loss_cls: 4.3935 (4.3935) grad_norm: 2.2502 (2.2502) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:29 root] (utils.py 283): INFO Epoch: [7] [ 10/2502] eta: 0:31:27 lr: 0.000018 loss_cls: 4.3935 (4.2527) grad_norm: 2.3050 (2.3451) time: 0.7574 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:36 root] (utils.py 283): INFO Epoch: [7] [ 20/2502] eta: 0:31:13 lr: 0.000018 loss_cls: 4.2242 (4.0029) grad_norm: 2.3058 (2.3481) time: 0.7541 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:44 root] (utils.py 283): INFO Epoch: [7] [ 30/2502] eta: 0:31:11 lr: 0.000018 loss_cls: 4.1683 (4.0404) grad_norm: 2.3093 (2.3378) time: 0.7568 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:52 root] (utils.py 283): INFO Epoch: [7] [ 40/2502] eta: 0:31:15 lr: 0.000018 loss_cls: 4.0750 (3.9787) grad_norm: 2.3223 (2.3395) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-10 15:07:59 root] (utils.py 283): INFO Epoch: [7] [ 50/2502] eta: 0:31:14 lr: 0.000018 loss_cls: 4.0198 (3.9630) grad_norm: 2.3448 (2.3431) time: 0.7756 data: 0.0002 max mem: 8426 +[2024-12-10 15:08:07 root] (utils.py 283): INFO Epoch: [7] [ 60/2502] eta: 0:31:02 lr: 0.000018 loss_cls: 4.2390 (3.9877) grad_norm: 2.2566 (2.3335) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 15:08:15 root] (utils.py 283): INFO Epoch: [7] [ 70/2502] eta: 0:30:51 lr: 0.000018 loss_cls: 4.2192 (3.9699) grad_norm: 2.2281 (2.3251) time: 0.7538 data: 0.0002 max mem: 8426 +[2024-12-10 15:08:22 root] (utils.py 283): INFO Epoch: [7] [ 80/2502] eta: 0:30:42 lr: 0.000018 loss_cls: 4.2192 (3.9843) grad_norm: 2.2508 (2.3247) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 15:08:30 root] (utils.py 283): INFO Epoch: [7] [ 90/2502] eta: 0:30:34 lr: 0.000018 loss_cls: 4.1910 (3.9949) grad_norm: 2.2493 (2.3143) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 15:08:37 root] (utils.py 283): INFO Epoch: [7] [ 100/2502] eta: 0:30:26 lr: 0.000018 loss_cls: 4.1565 (3.9958) grad_norm: 2.2332 (2.3083) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-10 15:08:45 root] (utils.py 283): INFO Epoch: [7] [ 110/2502] eta: 0:30:19 lr: 0.000018 loss_cls: 4.1901 (3.9999) grad_norm: 2.2525 (2.3102) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 15:08:53 root] (utils.py 283): INFO Epoch: [7] [ 120/2502] eta: 0:30:12 lr: 0.000018 loss_cls: 4.0545 (3.9737) grad_norm: 2.2525 (2.3043) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 15:09:00 root] (utils.py 283): INFO Epoch: [7] [ 130/2502] eta: 0:30:05 lr: 0.000018 loss_cls: 3.9462 (3.9799) grad_norm: 2.2743 (2.3059) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:08 root] (utils.py 283): INFO Epoch: [7] [ 140/2502] eta: 0:29:58 lr: 0.000018 loss_cls: 4.1599 (3.9822) grad_norm: 2.3660 (2.3076) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:15 root] (utils.py 283): INFO Epoch: [7] [ 150/2502] eta: 0:29:51 lr: 0.000018 loss_cls: 3.9424 (3.9756) grad_norm: 2.3061 (2.3112) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:23 root] (utils.py 283): INFO Epoch: [7] [ 160/2502] eta: 0:29:43 lr: 0.000018 loss_cls: 4.0893 (3.9886) grad_norm: 2.2979 (2.3100) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:31 root] (utils.py 283): INFO Epoch: [7] [ 170/2502] eta: 0:29:35 lr: 0.000018 loss_cls: 4.0893 (3.9633) grad_norm: 2.2595 (2.3081) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:38 root] (utils.py 283): INFO Epoch: [7] [ 180/2502] eta: 0:29:28 lr: 0.000018 loss_cls: 3.1042 (3.9318) grad_norm: 2.2994 (2.3075) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:46 root] (utils.py 283): INFO Epoch: [7] [ 190/2502] eta: 0:29:22 lr: 0.000018 loss_cls: 4.0011 (3.9477) grad_norm: 2.3306 (2.3093) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 15:09:54 root] (utils.py 283): INFO Epoch: [7] [ 200/2502] eta: 0:29:14 lr: 0.000018 loss_cls: 4.2816 (3.9499) grad_norm: 2.3306 (2.3104) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:01 root] (utils.py 283): INFO Epoch: [7] [ 210/2502] eta: 0:29:06 lr: 0.000018 loss_cls: 3.8300 (3.9428) grad_norm: 2.2952 (2.3083) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:09 root] (utils.py 283): INFO Epoch: [7] [ 220/2502] eta: 0:28:59 lr: 0.000018 loss_cls: 3.8075 (3.9337) grad_norm: 2.2254 (2.3056) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:17 root] (utils.py 283): INFO Epoch: [7] [ 230/2502] eta: 0:28:51 lr: 0.000018 loss_cls: 4.0571 (3.9410) grad_norm: 2.2381 (2.3078) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:24 root] (utils.py 283): INFO Epoch: [7] [ 240/2502] eta: 0:28:44 lr: 0.000018 loss_cls: 4.1178 (3.9519) grad_norm: 2.3149 (2.3111) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:32 root] (utils.py 283): INFO Epoch: [7] [ 250/2502] eta: 0:28:38 lr: 0.000018 loss_cls: 4.1187 (3.9539) grad_norm: 2.3214 (2.3127) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:40 root] (utils.py 283): INFO Epoch: [7] [ 260/2502] eta: 0:28:30 lr: 0.000018 loss_cls: 3.9475 (3.9528) grad_norm: 2.3231 (2.3152) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 15:10:47 root] (utils.py 283): INFO Epoch: [7] [ 270/2502] eta: 0:28:22 lr: 0.000018 loss_cls: 3.8973 (3.9454) grad_norm: 2.2877 (2.3134) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 15:10:55 root] (utils.py 283): INFO Epoch: [7] [ 280/2502] eta: 0:28:15 lr: 0.000018 loss_cls: 4.1218 (3.9468) grad_norm: 2.2627 (2.3122) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 15:11:03 root] (utils.py 283): INFO Epoch: [7] [ 290/2502] eta: 0:28:08 lr: 0.000018 loss_cls: 4.1218 (3.9487) grad_norm: 2.3400 (2.3136) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 15:11:10 root] (utils.py 283): INFO Epoch: [7] [ 300/2502] eta: 0:28:01 lr: 0.000018 loss_cls: 4.1263 (3.9489) grad_norm: 2.3510 (2.3134) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 15:11:18 root] (utils.py 283): INFO Epoch: [7] [ 310/2502] eta: 0:27:53 lr: 0.000018 loss_cls: 4.0002 (3.9394) grad_norm: 2.3078 (2.3144) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 15:11:26 root] (utils.py 283): INFO Epoch: [7] [ 320/2502] eta: 0:27:46 lr: 0.000018 loss_cls: 3.7766 (3.9388) grad_norm: 2.3350 (2.3152) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 15:11:33 root] (utils.py 283): INFO Epoch: [7] [ 330/2502] eta: 0:27:39 lr: 0.000018 loss_cls: 3.9128 (3.9412) grad_norm: 2.3350 (2.3181) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 15:11:41 root] (utils.py 283): INFO Epoch: [7] [ 340/2502] eta: 0:27:31 lr: 0.000018 loss_cls: 3.8589 (3.9347) grad_norm: 2.3340 (2.3191) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 15:11:49 root] (utils.py 283): INFO Epoch: [7] [ 350/2502] eta: 0:27:23 lr: 0.000018 loss_cls: 3.8674 (3.9341) grad_norm: 2.3057 (2.3190) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 15:11:56 root] (utils.py 283): INFO Epoch: [7] [ 360/2502] eta: 0:27:16 lr: 0.000018 loss_cls: 4.1697 (3.9406) grad_norm: 2.3362 (2.3204) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:04 root] (utils.py 283): INFO Epoch: [7] [ 370/2502] eta: 0:27:08 lr: 0.000018 loss_cls: 4.0962 (3.9351) grad_norm: 2.3580 (2.3212) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 15:12:12 root] (utils.py 283): INFO Epoch: [7] [ 380/2502] eta: 0:27:01 lr: 0.000018 loss_cls: 3.9504 (3.9373) grad_norm: 2.3305 (2.3210) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:19 root] (utils.py 283): INFO Epoch: [7] [ 390/2502] eta: 0:26:53 lr: 0.000018 loss_cls: 4.2553 (3.9407) grad_norm: 2.3690 (2.3235) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:27 root] (utils.py 283): INFO Epoch: [7] [ 400/2502] eta: 0:26:46 lr: 0.000018 loss_cls: 4.2486 (3.9472) grad_norm: 2.3771 (2.3243) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:35 root] (utils.py 283): INFO Epoch: [7] [ 410/2502] eta: 0:26:39 lr: 0.000018 loss_cls: 4.3637 (3.9511) grad_norm: 2.3808 (2.3301) time: 0.7773 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:43 root] (utils.py 283): INFO Epoch: [7] [ 420/2502] eta: 0:26:32 lr: 0.000018 loss_cls: 3.7288 (3.9459) grad_norm: 2.3735 (2.3315) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:50 root] (utils.py 283): INFO Epoch: [7] [ 430/2502] eta: 0:26:26 lr: 0.000018 loss_cls: 3.7685 (3.9479) grad_norm: 2.3629 (2.3315) time: 0.7840 data: 0.0002 max mem: 8426 +[2024-12-10 15:12:58 root] (utils.py 283): INFO Epoch: [7] [ 440/2502] eta: 0:26:18 lr: 0.000018 loss_cls: 3.8589 (3.9403) grad_norm: 2.2598 (2.3310) time: 0.7762 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:06 root] (utils.py 283): INFO Epoch: [7] [ 450/2502] eta: 0:26:10 lr: 0.000018 loss_cls: 3.7356 (3.9386) grad_norm: 2.2836 (2.3302) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:13 root] (utils.py 283): INFO Epoch: [7] [ 460/2502] eta: 0:26:03 lr: 0.000018 loss_cls: 4.1976 (3.9423) grad_norm: 2.2836 (2.3290) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:21 root] (utils.py 283): INFO Epoch: [7] [ 470/2502] eta: 0:25:55 lr: 0.000018 loss_cls: 4.1374 (3.9368) grad_norm: 2.2792 (2.3276) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:29 root] (utils.py 283): INFO Epoch: [7] [ 480/2502] eta: 0:25:47 lr: 0.000018 loss_cls: 3.8775 (3.9331) grad_norm: 2.2792 (2.3270) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 15:13:36 root] (utils.py 283): INFO Epoch: [7] [ 490/2502] eta: 0:25:39 lr: 0.000018 loss_cls: 4.0352 (3.9354) grad_norm: 2.2945 (2.3272) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:44 root] (utils.py 283): INFO Epoch: [7] [ 500/2502] eta: 0:25:32 lr: 0.000018 loss_cls: 4.1580 (3.9356) grad_norm: 2.3097 (2.3265) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:52 root] (utils.py 283): INFO Epoch: [7] [ 510/2502] eta: 0:25:24 lr: 0.000018 loss_cls: 4.0837 (3.9393) grad_norm: 2.3395 (2.3272) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 15:13:59 root] (utils.py 283): INFO Epoch: [7] [ 520/2502] eta: 0:25:16 lr: 0.000018 loss_cls: 4.0156 (3.9374) grad_norm: 2.3435 (2.3272) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:07 root] (utils.py 283): INFO Epoch: [7] [ 530/2502] eta: 0:25:09 lr: 0.000018 loss_cls: 4.0156 (3.9392) grad_norm: 2.3076 (2.3276) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:14 root] (utils.py 283): INFO Epoch: [7] [ 540/2502] eta: 0:25:01 lr: 0.000018 loss_cls: 4.2660 (3.9349) grad_norm: 2.2781 (2.3272) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:22 root] (utils.py 283): INFO Epoch: [7] [ 550/2502] eta: 0:24:53 lr: 0.000018 loss_cls: 4.0356 (3.9354) grad_norm: 2.3350 (2.3280) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:30 root] (utils.py 283): INFO Epoch: [7] [ 560/2502] eta: 0:24:45 lr: 0.000018 loss_cls: 4.1782 (3.9398) grad_norm: 2.3624 (2.3288) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:37 root] (utils.py 283): INFO Epoch: [7] [ 570/2502] eta: 0:24:38 lr: 0.000018 loss_cls: 4.2259 (3.9402) grad_norm: 2.2842 (2.3285) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:45 root] (utils.py 283): INFO Epoch: [7] [ 580/2502] eta: 0:24:31 lr: 0.000018 loss_cls: 4.2307 (3.9450) grad_norm: 2.3265 (2.3292) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 15:14:53 root] (utils.py 283): INFO Epoch: [7] [ 590/2502] eta: 0:24:24 lr: 0.000018 loss_cls: 4.3072 (3.9510) grad_norm: 2.3440 (2.3295) time: 0.7785 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:01 root] (utils.py 283): INFO Epoch: [7] [ 600/2502] eta: 0:24:16 lr: 0.000018 loss_cls: 4.2282 (3.9506) grad_norm: 2.3621 (2.3306) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:08 root] (utils.py 283): INFO Epoch: [7] [ 610/2502] eta: 0:24:08 lr: 0.000018 loss_cls: 3.9457 (3.9503) grad_norm: 2.3546 (2.3302) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:16 root] (utils.py 283): INFO Epoch: [7] [ 620/2502] eta: 0:24:01 lr: 0.000018 loss_cls: 3.8923 (3.9485) grad_norm: 2.3122 (2.3299) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:24 root] (utils.py 283): INFO Epoch: [7] [ 630/2502] eta: 0:23:53 lr: 0.000018 loss_cls: 3.9796 (3.9496) grad_norm: 2.3122 (2.3291) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:31 root] (utils.py 283): INFO Epoch: [7] [ 640/2502] eta: 0:23:46 lr: 0.000018 loss_cls: 4.0650 (3.9491) grad_norm: 2.3328 (2.3298) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:39 root] (utils.py 283): INFO Epoch: [7] [ 650/2502] eta: 0:23:38 lr: 0.000018 loss_cls: 3.9840 (3.9474) grad_norm: 2.2952 (2.3296) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:47 root] (utils.py 283): INFO Epoch: [7] [ 660/2502] eta: 0:23:31 lr: 0.000018 loss_cls: 4.1906 (3.9522) grad_norm: 2.2915 (2.3295) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 15:15:55 root] (utils.py 283): INFO Epoch: [7] [ 670/2502] eta: 0:23:23 lr: 0.000018 loss_cls: 4.2031 (3.9521) grad_norm: 2.2752 (2.3284) time: 0.7784 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:02 root] (utils.py 283): INFO Epoch: [7] [ 680/2502] eta: 0:23:16 lr: 0.000018 loss_cls: 3.9983 (3.9534) grad_norm: 2.2752 (2.3286) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:10 root] (utils.py 283): INFO Epoch: [7] [ 690/2502] eta: 0:23:08 lr: 0.000018 loss_cls: 4.0064 (3.9542) grad_norm: 2.3842 (2.3297) time: 0.7770 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:18 root] (utils.py 283): INFO Epoch: [7] [ 700/2502] eta: 0:23:01 lr: 0.000018 loss_cls: 4.0085 (3.9565) grad_norm: 2.3868 (2.3293) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:26 root] (utils.py 283): INFO Epoch: [7] [ 710/2502] eta: 0:22:53 lr: 0.000018 loss_cls: 4.0488 (3.9578) grad_norm: 2.3472 (2.3292) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:33 root] (utils.py 283): INFO Epoch: [7] [ 720/2502] eta: 0:22:45 lr: 0.000018 loss_cls: 4.1788 (3.9598) grad_norm: 2.2888 (2.3283) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 15:16:41 root] (utils.py 283): INFO Epoch: [7] [ 730/2502] eta: 0:22:38 lr: 0.000018 loss_cls: 4.2197 (3.9615) grad_norm: 2.2552 (2.3289) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:49 root] (utils.py 283): INFO Epoch: [7] [ 740/2502] eta: 0:22:30 lr: 0.000018 loss_cls: 4.2340 (3.9639) grad_norm: 2.2779 (2.3281) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 15:16:56 root] (utils.py 283): INFO Epoch: [7] [ 750/2502] eta: 0:22:23 lr: 0.000018 loss_cls: 4.3105 (3.9667) grad_norm: 2.2116 (2.3274) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:04 root] (utils.py 283): INFO Epoch: [7] [ 760/2502] eta: 0:22:15 lr: 0.000018 loss_cls: 4.2540 (3.9701) grad_norm: 2.2157 (2.3274) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:12 root] (utils.py 283): INFO Epoch: [7] [ 770/2502] eta: 0:22:07 lr: 0.000018 loss_cls: 4.2540 (3.9729) grad_norm: 2.3274 (2.3274) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:19 root] (utils.py 283): INFO Epoch: [7] [ 780/2502] eta: 0:22:00 lr: 0.000018 loss_cls: 4.1289 (3.9707) grad_norm: 2.3085 (2.3271) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:27 root] (utils.py 283): INFO Epoch: [7] [ 790/2502] eta: 0:21:52 lr: 0.000018 loss_cls: 4.0306 (3.9740) grad_norm: 2.3077 (2.3274) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 15:17:35 root] (utils.py 283): INFO Epoch: [7] [ 800/2502] eta: 0:21:44 lr: 0.000018 loss_cls: 4.0306 (3.9714) grad_norm: 2.3094 (2.3267) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:42 root] (utils.py 283): INFO Epoch: [7] [ 810/2502] eta: 0:21:36 lr: 0.000018 loss_cls: 4.0648 (3.9740) grad_norm: 2.2889 (2.3264) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:50 root] (utils.py 283): INFO Epoch: [7] [ 820/2502] eta: 0:21:29 lr: 0.000018 loss_cls: 4.1796 (3.9732) grad_norm: 2.2889 (2.3260) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 15:17:57 root] (utils.py 283): INFO Epoch: [7] [ 830/2502] eta: 0:21:21 lr: 0.000018 loss_cls: 3.8686 (3.9715) grad_norm: 2.2711 (2.3256) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:05 root] (utils.py 283): INFO Epoch: [7] [ 840/2502] eta: 0:21:13 lr: 0.000018 loss_cls: 3.7527 (3.9690) grad_norm: 2.2711 (2.3262) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:13 root] (utils.py 283): INFO Epoch: [7] [ 850/2502] eta: 0:21:06 lr: 0.000018 loss_cls: 3.9519 (3.9689) grad_norm: 2.2810 (2.3259) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:21 root] (utils.py 283): INFO Epoch: [7] [ 860/2502] eta: 0:20:58 lr: 0.000018 loss_cls: 3.9779 (3.9691) grad_norm: 2.3046 (2.3263) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:28 root] (utils.py 283): INFO Epoch: [7] [ 870/2502] eta: 0:20:50 lr: 0.000018 loss_cls: 4.2919 (3.9719) grad_norm: 2.3526 (2.3266) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:36 root] (utils.py 283): INFO Epoch: [7] [ 880/2502] eta: 0:20:43 lr: 0.000018 loss_cls: 4.3405 (3.9710) grad_norm: 2.2953 (2.3265) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:43 root] (utils.py 283): INFO Epoch: [7] [ 890/2502] eta: 0:20:35 lr: 0.000018 loss_cls: 4.1307 (3.9724) grad_norm: 2.2610 (2.3264) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 15:18:51 root] (utils.py 283): INFO Epoch: [7] [ 900/2502] eta: 0:20:27 lr: 0.000018 loss_cls: 4.0340 (3.9736) grad_norm: 2.2706 (2.3264) time: 0.7564 data: 0.0003 max mem: 8426 +[2024-12-10 15:18:59 root] (utils.py 283): INFO Epoch: [7] [ 910/2502] eta: 0:20:19 lr: 0.000018 loss_cls: 3.9494 (3.9729) grad_norm: 2.2748 (2.3263) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 15:19:06 root] (utils.py 283): INFO Epoch: [7] [ 920/2502] eta: 0:20:12 lr: 0.000018 loss_cls: 3.9364 (3.9720) grad_norm: 2.2934 (2.3263) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 15:19:14 root] (utils.py 283): INFO Epoch: [7] [ 930/2502] eta: 0:20:04 lr: 0.000018 loss_cls: 3.5746 (3.9688) grad_norm: 2.3383 (2.3266) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 15:19:21 root] (utils.py 283): INFO Epoch: [7] [ 940/2502] eta: 0:19:56 lr: 0.000018 loss_cls: 3.5746 (3.9666) grad_norm: 2.3480 (2.3268) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 15:19:29 root] (utils.py 283): INFO Epoch: [7] [ 950/2502] eta: 0:19:48 lr: 0.000018 loss_cls: 3.7639 (3.9650) grad_norm: 2.3058 (2.3267) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 15:19:37 root] (utils.py 283): INFO Epoch: [7] [ 960/2502] eta: 0:19:41 lr: 0.000018 loss_cls: 3.8478 (3.9662) grad_norm: 2.2982 (2.3258) time: 0.7538 data: 0.0003 max mem: 8426 +[2024-12-10 15:19:44 root] (utils.py 283): INFO Epoch: [7] [ 970/2502] eta: 0:19:33 lr: 0.000018 loss_cls: 4.0419 (3.9658) grad_norm: 2.2822 (2.3254) time: 0.7544 data: 0.0003 max mem: 8426 +[2024-12-10 15:19:52 root] (utils.py 283): INFO Epoch: [7] [ 980/2502] eta: 0:19:25 lr: 0.000018 loss_cls: 4.0720 (3.9671) grad_norm: 2.2834 (2.3252) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 15:19:59 root] (utils.py 283): INFO Epoch: [7] [ 990/2502] eta: 0:19:17 lr: 0.000018 loss_cls: 4.0720 (3.9683) grad_norm: 2.2850 (2.3251) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:07 root] (utils.py 283): INFO Epoch: [7] [1000/2502] eta: 0:19:09 lr: 0.000018 loss_cls: 4.0241 (3.9681) grad_norm: 2.3746 (2.3258) time: 0.7548 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:14 root] (utils.py 283): INFO Epoch: [7] [1010/2502] eta: 0:19:02 lr: 0.000018 loss_cls: 3.9631 (3.9685) grad_norm: 2.3926 (2.3260) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:22 root] (utils.py 283): INFO Epoch: [7] [1020/2502] eta: 0:18:54 lr: 0.000018 loss_cls: 4.0128 (3.9681) grad_norm: 2.3306 (2.3258) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:30 root] (utils.py 283): INFO Epoch: [7] [1030/2502] eta: 0:18:46 lr: 0.000018 loss_cls: 3.5289 (3.9631) grad_norm: 2.3264 (2.3258) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:37 root] (utils.py 283): INFO Epoch: [7] [1040/2502] eta: 0:18:38 lr: 0.000018 loss_cls: 3.7750 (3.9649) grad_norm: 2.3086 (2.3253) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:45 root] (utils.py 283): INFO Epoch: [7] [1050/2502] eta: 0:18:31 lr: 0.000018 loss_cls: 4.0609 (3.9633) grad_norm: 2.2888 (2.3248) time: 0.7577 data: 0.0002 max mem: 8426 +[2024-12-10 15:20:52 root] (utils.py 283): INFO Epoch: [7] [1060/2502] eta: 0:18:23 lr: 0.000018 loss_cls: 4.0609 (3.9636) grad_norm: 2.2888 (2.3248) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:00 root] (utils.py 283): INFO Epoch: [7] [1070/2502] eta: 0:18:15 lr: 0.000018 loss_cls: 3.9156 (3.9637) grad_norm: 2.2328 (2.3241) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:08 root] (utils.py 283): INFO Epoch: [7] [1080/2502] eta: 0:18:07 lr: 0.000018 loss_cls: 3.8721 (3.9627) grad_norm: 2.2213 (2.3237) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:15 root] (utils.py 283): INFO Epoch: [7] [1090/2502] eta: 0:18:00 lr: 0.000018 loss_cls: 4.1404 (3.9634) grad_norm: 2.2987 (2.3236) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:23 root] (utils.py 283): INFO Epoch: [7] [1100/2502] eta: 0:17:52 lr: 0.000018 loss_cls: 4.1177 (3.9627) grad_norm: 2.3225 (2.3240) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:30 root] (utils.py 283): INFO Epoch: [7] [1110/2502] eta: 0:17:44 lr: 0.000018 loss_cls: 4.0312 (3.9605) grad_norm: 2.3282 (2.3241) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:38 root] (utils.py 283): INFO Epoch: [7] [1120/2502] eta: 0:17:37 lr: 0.000018 loss_cls: 3.7253 (3.9581) grad_norm: 2.3555 (2.3240) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:46 root] (utils.py 283): INFO Epoch: [7] [1130/2502] eta: 0:17:29 lr: 0.000018 loss_cls: 4.0023 (3.9590) grad_norm: 2.2377 (2.3236) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 15:21:53 root] (utils.py 283): INFO Epoch: [7] [1140/2502] eta: 0:17:21 lr: 0.000018 loss_cls: 4.0681 (3.9585) grad_norm: 2.2569 (2.3235) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 15:22:01 root] (utils.py 283): INFO Epoch: [7] [1150/2502] eta: 0:17:14 lr: 0.000018 loss_cls: 3.9044 (3.9580) grad_norm: 2.2804 (2.3234) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 15:22:09 root] (utils.py 283): INFO Epoch: [7] [1160/2502] eta: 0:17:06 lr: 0.000018 loss_cls: 3.9044 (3.9573) grad_norm: 2.3014 (2.3232) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 15:22:16 root] (utils.py 283): INFO Epoch: [7] [1170/2502] eta: 0:16:58 lr: 0.000018 loss_cls: 4.1184 (3.9570) grad_norm: 2.3014 (2.3230) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 15:22:24 root] (utils.py 283): INFO Epoch: [7] [1180/2502] eta: 0:16:51 lr: 0.000018 loss_cls: 4.1428 (3.9591) grad_norm: 2.3217 (2.3234) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 15:22:32 root] (utils.py 283): INFO Epoch: [7] [1190/2502] eta: 0:16:43 lr: 0.000018 loss_cls: 4.0974 (3.9577) grad_norm: 2.3525 (2.3236) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 15:22:39 root] (utils.py 283): INFO Epoch: [7] [1200/2502] eta: 0:16:35 lr: 0.000018 loss_cls: 3.4831 (3.9550) grad_norm: 2.3277 (2.3236) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 15:22:47 root] (utils.py 283): INFO Epoch: [7] [1210/2502] eta: 0:16:28 lr: 0.000018 loss_cls: 3.6318 (3.9544) grad_norm: 2.3355 (2.3238) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 15:22:55 root] (utils.py 283): INFO Epoch: [7] [1220/2502] eta: 0:16:20 lr: 0.000018 loss_cls: 3.7521 (3.9520) grad_norm: 2.3924 (2.3245) time: 0.7782 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:03 root] (utils.py 283): INFO Epoch: [7] [1230/2502] eta: 0:16:13 lr: 0.000018 loss_cls: 3.8984 (3.9521) grad_norm: 2.2981 (2.3241) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:10 root] (utils.py 283): INFO Epoch: [7] [1240/2502] eta: 0:16:05 lr: 0.000018 loss_cls: 3.9355 (3.9514) grad_norm: 2.2490 (2.3233) time: 0.7840 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:18 root] (utils.py 283): INFO Epoch: [7] [1250/2502] eta: 0:15:58 lr: 0.000018 loss_cls: 3.9671 (3.9504) grad_norm: 2.2301 (2.3230) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:26 root] (utils.py 283): INFO Epoch: [7] [1260/2502] eta: 0:15:50 lr: 0.000018 loss_cls: 4.0758 (3.9525) grad_norm: 2.2702 (2.3230) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:34 root] (utils.py 283): INFO Epoch: [7] [1270/2502] eta: 0:15:43 lr: 0.000018 loss_cls: 4.1524 (3.9542) grad_norm: 2.3030 (2.3225) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:41 root] (utils.py 283): INFO Epoch: [7] [1280/2502] eta: 0:15:35 lr: 0.000018 loss_cls: 4.0675 (3.9539) grad_norm: 2.2552 (2.3224) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:49 root] (utils.py 283): INFO Epoch: [7] [1290/2502] eta: 0:15:27 lr: 0.000018 loss_cls: 4.0050 (3.9544) grad_norm: 2.2946 (2.3227) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 15:23:56 root] (utils.py 283): INFO Epoch: [7] [1300/2502] eta: 0:15:20 lr: 0.000018 loss_cls: 4.0267 (3.9558) grad_norm: 2.3023 (2.3225) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:04 root] (utils.py 283): INFO Epoch: [7] [1310/2502] eta: 0:15:12 lr: 0.000018 loss_cls: 3.9599 (3.9547) grad_norm: 2.2506 (2.3220) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:12 root] (utils.py 283): INFO Epoch: [7] [1320/2502] eta: 0:15:04 lr: 0.000018 loss_cls: 3.9458 (3.9544) grad_norm: 2.2506 (2.3221) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:19 root] (utils.py 283): INFO Epoch: [7] [1330/2502] eta: 0:14:57 lr: 0.000018 loss_cls: 3.9512 (3.9522) grad_norm: 2.2842 (2.3218) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:27 root] (utils.py 283): INFO Epoch: [7] [1340/2502] eta: 0:14:49 lr: 0.000018 loss_cls: 3.7602 (3.9511) grad_norm: 2.3062 (2.3222) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:35 root] (utils.py 283): INFO Epoch: [7] [1350/2502] eta: 0:14:41 lr: 0.000018 loss_cls: 4.1948 (3.9534) grad_norm: 2.3379 (2.3226) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:42 root] (utils.py 283): INFO Epoch: [7] [1360/2502] eta: 0:14:34 lr: 0.000018 loss_cls: 4.2227 (3.9530) grad_norm: 2.3379 (2.3230) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:50 root] (utils.py 283): INFO Epoch: [7] [1370/2502] eta: 0:14:26 lr: 0.000018 loss_cls: 4.1841 (3.9544) grad_norm: 2.3358 (2.3231) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 15:24:58 root] (utils.py 283): INFO Epoch: [7] [1380/2502] eta: 0:14:18 lr: 0.000018 loss_cls: 4.3361 (3.9571) grad_norm: 2.2801 (2.3228) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 15:25:05 root] (utils.py 283): INFO Epoch: [7] [1390/2502] eta: 0:14:11 lr: 0.000018 loss_cls: 4.1728 (3.9570) grad_norm: 2.2769 (2.3225) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 15:25:13 root] (utils.py 283): INFO Epoch: [7] [1400/2502] eta: 0:14:03 lr: 0.000018 loss_cls: 3.9457 (3.9563) grad_norm: 2.2554 (2.3223) time: 0.7734 data: 0.0002 max mem: 8426 +[2024-12-10 15:25:21 root] (utils.py 283): INFO Epoch: [7] [1410/2502] eta: 0:13:55 lr: 0.000018 loss_cls: 4.0040 (3.9563) grad_norm: 2.2482 (2.3222) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 15:25:28 root] (utils.py 283): INFO Epoch: [7] [1420/2502] eta: 0:13:48 lr: 0.000018 loss_cls: 4.0310 (3.9555) grad_norm: 2.2531 (2.3220) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 15:25:36 root] (utils.py 283): INFO Epoch: [7] [1430/2502] eta: 0:13:40 lr: 0.000018 loss_cls: 3.9305 (3.9540) grad_norm: 2.3273 (2.3219) time: 0.7739 data: 0.0003 max mem: 8426 +[2024-12-10 15:25:44 root] (utils.py 283): INFO Epoch: [7] [1440/2502] eta: 0:13:33 lr: 0.000018 loss_cls: 3.9278 (3.9526) grad_norm: 2.2719 (2.3215) time: 0.7771 data: 0.0002 max mem: 8426 +[2024-12-10 15:25:52 root] (utils.py 283): INFO Epoch: [7] [1450/2502] eta: 0:13:25 lr: 0.000018 loss_cls: 3.9278 (3.9520) grad_norm: 2.2358 (2.3216) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 15:25:59 root] (utils.py 283): INFO Epoch: [7] [1460/2502] eta: 0:13:17 lr: 0.000018 loss_cls: 4.1200 (3.9542) grad_norm: 2.3255 (2.3217) time: 0.7754 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:07 root] (utils.py 283): INFO Epoch: [7] [1470/2502] eta: 0:13:10 lr: 0.000018 loss_cls: 4.3364 (3.9539) grad_norm: 2.3577 (2.3221) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:15 root] (utils.py 283): INFO Epoch: [7] [1480/2502] eta: 0:13:02 lr: 0.000018 loss_cls: 3.8808 (3.9525) grad_norm: 2.3577 (2.3221) time: 0.7794 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:23 root] (utils.py 283): INFO Epoch: [7] [1490/2502] eta: 0:12:55 lr: 0.000018 loss_cls: 3.9592 (3.9528) grad_norm: 2.3905 (2.3228) time: 0.7788 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:31 root] (utils.py 283): INFO Epoch: [7] [1500/2502] eta: 0:12:47 lr: 0.000018 loss_cls: 4.0428 (3.9528) grad_norm: 2.3921 (2.3233) time: 0.7785 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:38 root] (utils.py 283): INFO Epoch: [7] [1510/2502] eta: 0:12:40 lr: 0.000018 loss_cls: 4.0428 (3.9531) grad_norm: 2.2613 (2.3226) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:46 root] (utils.py 283): INFO Epoch: [7] [1520/2502] eta: 0:12:32 lr: 0.000018 loss_cls: 3.8532 (3.9510) grad_norm: 2.2961 (2.3230) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 15:26:54 root] (utils.py 283): INFO Epoch: [7] [1530/2502] eta: 0:12:24 lr: 0.000018 loss_cls: 3.4496 (3.9488) grad_norm: 2.3080 (2.3227) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:02 root] (utils.py 283): INFO Epoch: [7] [1540/2502] eta: 0:12:17 lr: 0.000018 loss_cls: 4.1017 (3.9504) grad_norm: 2.2569 (2.3225) time: 0.7832 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:10 root] (utils.py 283): INFO Epoch: [7] [1550/2502] eta: 0:12:09 lr: 0.000018 loss_cls: 4.1508 (3.9511) grad_norm: 2.2595 (2.3221) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:17 root] (utils.py 283): INFO Epoch: [7] [1560/2502] eta: 0:12:02 lr: 0.000018 loss_cls: 3.8908 (3.9514) grad_norm: 2.2780 (2.3221) time: 0.7785 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:25 root] (utils.py 283): INFO Epoch: [7] [1570/2502] eta: 0:11:54 lr: 0.000018 loss_cls: 4.0367 (3.9530) grad_norm: 2.3284 (2.3223) time: 0.7781 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:33 root] (utils.py 283): INFO Epoch: [7] [1580/2502] eta: 0:11:47 lr: 0.000018 loss_cls: 3.9634 (3.9519) grad_norm: 2.3997 (2.3225) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:41 root] (utils.py 283): INFO Epoch: [7] [1590/2502] eta: 0:11:39 lr: 0.000018 loss_cls: 3.9823 (3.9538) grad_norm: 2.3083 (2.3224) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:49 root] (utils.py 283): INFO Epoch: [7] [1600/2502] eta: 0:11:31 lr: 0.000018 loss_cls: 4.2596 (3.9556) grad_norm: 2.3263 (2.3226) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 15:27:56 root] (utils.py 283): INFO Epoch: [7] [1610/2502] eta: 0:11:24 lr: 0.000018 loss_cls: 4.2040 (3.9545) grad_norm: 2.3066 (2.3224) time: 0.7848 data: 0.0002 max mem: 8426 +[2024-12-10 15:28:04 root] (utils.py 283): INFO Epoch: [7] [1620/2502] eta: 0:11:16 lr: 0.000018 loss_cls: 3.8214 (3.9519) grad_norm: 2.2292 (2.3220) time: 0.7916 data: 0.0002 max mem: 8426 +[2024-12-10 15:28:12 root] (utils.py 283): INFO Epoch: [7] [1630/2502] eta: 0:11:09 lr: 0.000018 loss_cls: 3.8357 (3.9518) grad_norm: 2.2621 (2.3220) time: 0.7961 data: 0.0002 max mem: 8426 +[2024-12-10 15:28:20 root] (utils.py 283): INFO Epoch: [7] [1640/2502] eta: 0:11:01 lr: 0.000018 loss_cls: 4.1162 (3.9521) grad_norm: 2.2959 (2.3221) time: 0.7960 data: 0.0002 max mem: 8426 +[2024-12-10 15:28:28 root] (utils.py 283): INFO Epoch: [7] [1650/2502] eta: 0:10:54 lr: 0.000018 loss_cls: 4.1378 (3.9512) grad_norm: 2.2959 (2.3220) time: 0.7957 data: 0.0002 max mem: 8426 +[2024-12-10 15:28:36 root] (utils.py 283): INFO Epoch: [7] [1660/2502] eta: 0:10:46 lr: 0.000018 loss_cls: 4.3329 (3.9534) grad_norm: 2.3805 (2.3226) time: 0.7944 data: 0.0003 max mem: 8426 +[2024-12-10 15:28:44 root] (utils.py 283): INFO Epoch: [7] [1670/2502] eta: 0:10:39 lr: 0.000018 loss_cls: 4.4544 (3.9543) grad_norm: 2.3437 (2.3224) time: 0.7936 data: 0.0003 max mem: 8426 +[2024-12-10 15:28:52 root] (utils.py 283): INFO Epoch: [7] [1680/2502] eta: 0:10:31 lr: 0.000018 loss_cls: 4.2076 (3.9540) grad_norm: 2.3197 (2.3225) time: 0.7898 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:00 root] (utils.py 283): INFO Epoch: [7] [1690/2502] eta: 0:10:23 lr: 0.000018 loss_cls: 3.9710 (3.9543) grad_norm: 2.3197 (2.3227) time: 0.7837 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:08 root] (utils.py 283): INFO Epoch: [7] [1700/2502] eta: 0:10:16 lr: 0.000018 loss_cls: 4.1847 (3.9556) grad_norm: 2.2948 (2.3230) time: 0.7793 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:15 root] (utils.py 283): INFO Epoch: [7] [1710/2502] eta: 0:10:08 lr: 0.000018 loss_cls: 4.1574 (3.9570) grad_norm: 2.3139 (2.3229) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:23 root] (utils.py 283): INFO Epoch: [7] [1720/2502] eta: 0:10:00 lr: 0.000018 loss_cls: 4.1506 (3.9584) grad_norm: 2.3256 (2.3232) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:31 root] (utils.py 283): INFO Epoch: [7] [1730/2502] eta: 0:09:53 lr: 0.000018 loss_cls: 4.1709 (3.9582) grad_norm: 2.3460 (2.3233) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:38 root] (utils.py 283): INFO Epoch: [7] [1740/2502] eta: 0:09:45 lr: 0.000018 loss_cls: 4.1298 (3.9569) grad_norm: 2.3460 (2.3238) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:46 root] (utils.py 283): INFO Epoch: [7] [1750/2502] eta: 0:09:37 lr: 0.000018 loss_cls: 3.8480 (3.9566) grad_norm: 2.2670 (2.3236) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 15:29:54 root] (utils.py 283): INFO Epoch: [7] [1760/2502] eta: 0:09:30 lr: 0.000018 loss_cls: 3.8825 (3.9554) grad_norm: 2.2670 (2.3235) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:01 root] (utils.py 283): INFO Epoch: [7] [1770/2502] eta: 0:09:22 lr: 0.000018 loss_cls: 3.5646 (3.9541) grad_norm: 2.2614 (2.3233) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:09 root] (utils.py 283): INFO Epoch: [7] [1780/2502] eta: 0:09:14 lr: 0.000018 loss_cls: 4.1688 (3.9548) grad_norm: 2.2904 (2.3234) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:17 root] (utils.py 283): INFO Epoch: [7] [1790/2502] eta: 0:09:07 lr: 0.000018 loss_cls: 4.0121 (3.9550) grad_norm: 2.3559 (2.3238) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:24 root] (utils.py 283): INFO Epoch: [7] [1800/2502] eta: 0:08:59 lr: 0.000018 loss_cls: 3.9953 (3.9553) grad_norm: 2.3925 (2.3240) time: 0.7724 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:32 root] (utils.py 283): INFO Epoch: [7] [1810/2502] eta: 0:08:51 lr: 0.000018 loss_cls: 3.8212 (3.9540) grad_norm: 2.3325 (2.3244) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:40 root] (utils.py 283): INFO Epoch: [7] [1820/2502] eta: 0:08:44 lr: 0.000018 loss_cls: 3.7750 (3.9530) grad_norm: 2.3691 (2.3250) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:48 root] (utils.py 283): INFO Epoch: [7] [1830/2502] eta: 0:08:36 lr: 0.000018 loss_cls: 3.8911 (3.9544) grad_norm: 2.3208 (2.3249) time: 0.7770 data: 0.0002 max mem: 8426 +[2024-12-10 15:30:55 root] (utils.py 283): INFO Epoch: [7] [1840/2502] eta: 0:08:28 lr: 0.000018 loss_cls: 4.1013 (3.9543) grad_norm: 2.2980 (2.3253) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:03 root] (utils.py 283): INFO Epoch: [7] [1850/2502] eta: 0:08:21 lr: 0.000018 loss_cls: 4.1013 (3.9547) grad_norm: 2.2719 (2.3248) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:11 root] (utils.py 283): INFO Epoch: [7] [1860/2502] eta: 0:08:13 lr: 0.000018 loss_cls: 3.8503 (3.9530) grad_norm: 2.2373 (2.3245) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:18 root] (utils.py 283): INFO Epoch: [7] [1870/2502] eta: 0:08:05 lr: 0.000018 loss_cls: 3.6670 (3.9523) grad_norm: 2.2386 (2.3245) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:26 root] (utils.py 283): INFO Epoch: [7] [1880/2502] eta: 0:07:57 lr: 0.000018 loss_cls: 3.7858 (3.9506) grad_norm: 2.2893 (2.3245) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:34 root] (utils.py 283): INFO Epoch: [7] [1890/2502] eta: 0:07:50 lr: 0.000018 loss_cls: 3.8184 (3.9508) grad_norm: 2.3417 (2.3248) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 15:31:41 root] (utils.py 283): INFO Epoch: [7] [1900/2502] eta: 0:07:42 lr: 0.000018 loss_cls: 4.0532 (3.9518) grad_norm: 2.2928 (2.3244) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:49 root] (utils.py 283): INFO Epoch: [7] [1910/2502] eta: 0:07:34 lr: 0.000018 loss_cls: 4.1947 (3.9540) grad_norm: 2.2295 (2.3243) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 15:31:56 root] (utils.py 283): INFO Epoch: [7] [1920/2502] eta: 0:07:27 lr: 0.000018 loss_cls: 4.1391 (3.9535) grad_norm: 2.3206 (2.3243) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 15:32:04 root] (utils.py 283): INFO Epoch: [7] [1930/2502] eta: 0:07:19 lr: 0.000018 loss_cls: 4.1367 (3.9544) grad_norm: 2.3317 (2.3243) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 15:32:12 root] (utils.py 283): INFO Epoch: [7] [1940/2502] eta: 0:07:11 lr: 0.000018 loss_cls: 4.1637 (3.9551) grad_norm: 2.3651 (2.3249) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 15:32:19 root] (utils.py 283): INFO Epoch: [7] [1950/2502] eta: 0:07:04 lr: 0.000018 loss_cls: 4.1220 (3.9550) grad_norm: 2.3492 (2.3249) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 15:32:27 root] (utils.py 283): INFO Epoch: [7] [1960/2502] eta: 0:06:56 lr: 0.000018 loss_cls: 3.9120 (3.9550) grad_norm: 2.2802 (2.3247) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 15:32:35 root] (utils.py 283): INFO Epoch: [7] [1970/2502] eta: 0:06:48 lr: 0.000018 loss_cls: 4.1908 (3.9564) grad_norm: 2.2802 (2.3249) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 15:32:42 root] (utils.py 283): INFO Epoch: [7] [1980/2502] eta: 0:06:40 lr: 0.000018 loss_cls: 4.3931 (3.9568) grad_norm: 2.2772 (2.3246) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 15:32:50 root] (utils.py 283): INFO Epoch: [7] [1990/2502] eta: 0:06:33 lr: 0.000018 loss_cls: 3.8263 (3.9560) grad_norm: 2.2429 (2.3241) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 15:32:57 root] (utils.py 283): INFO Epoch: [7] [2000/2502] eta: 0:06:25 lr: 0.000018 loss_cls: 3.7604 (3.9555) grad_norm: 2.2429 (2.3237) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:05 root] (utils.py 283): INFO Epoch: [7] [2010/2502] eta: 0:06:17 lr: 0.000018 loss_cls: 4.0761 (3.9558) grad_norm: 2.2642 (2.3238) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:13 root] (utils.py 283): INFO Epoch: [7] [2020/2502] eta: 0:06:10 lr: 0.000018 loss_cls: 4.0761 (3.9559) grad_norm: 2.3203 (2.3238) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:21 root] (utils.py 283): INFO Epoch: [7] [2030/2502] eta: 0:06:02 lr: 0.000018 loss_cls: 4.2169 (3.9566) grad_norm: 2.3769 (2.3238) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:28 root] (utils.py 283): INFO Epoch: [7] [2040/2502] eta: 0:05:54 lr: 0.000018 loss_cls: 4.1004 (3.9563) grad_norm: 2.3004 (2.3236) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:36 root] (utils.py 283): INFO Epoch: [7] [2050/2502] eta: 0:05:47 lr: 0.000018 loss_cls: 4.0502 (3.9568) grad_norm: 2.3308 (2.3238) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:43 root] (utils.py 283): INFO Epoch: [7] [2060/2502] eta: 0:05:39 lr: 0.000018 loss_cls: 4.1965 (3.9584) grad_norm: 2.3308 (2.3236) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:51 root] (utils.py 283): INFO Epoch: [7] [2070/2502] eta: 0:05:31 lr: 0.000018 loss_cls: 4.1965 (3.9578) grad_norm: 2.2919 (2.3237) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 15:33:59 root] (utils.py 283): INFO Epoch: [7] [2080/2502] eta: 0:05:24 lr: 0.000018 loss_cls: 4.0787 (3.9580) grad_norm: 2.3421 (2.3239) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:06 root] (utils.py 283): INFO Epoch: [7] [2090/2502] eta: 0:05:16 lr: 0.000018 loss_cls: 4.0787 (3.9576) grad_norm: 2.3157 (2.3235) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:14 root] (utils.py 283): INFO Epoch: [7] [2100/2502] eta: 0:05:08 lr: 0.000018 loss_cls: 3.9534 (3.9579) grad_norm: 2.2573 (2.3234) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:22 root] (utils.py 283): INFO Epoch: [7] [2110/2502] eta: 0:05:01 lr: 0.000018 loss_cls: 3.8818 (3.9579) grad_norm: 2.2729 (2.3239) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:29 root] (utils.py 283): INFO Epoch: [7] [2120/2502] eta: 0:04:53 lr: 0.000018 loss_cls: 4.0825 (3.9582) grad_norm: 2.3371 (2.3239) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:37 root] (utils.py 283): INFO Epoch: [7] [2130/2502] eta: 0:04:45 lr: 0.000018 loss_cls: 4.0825 (3.9577) grad_norm: 2.3069 (2.3239) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:45 root] (utils.py 283): INFO Epoch: [7] [2140/2502] eta: 0:04:37 lr: 0.000018 loss_cls: 3.7260 (3.9565) grad_norm: 2.2738 (2.3236) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 15:34:52 root] (utils.py 283): INFO Epoch: [7] [2150/2502] eta: 0:04:30 lr: 0.000018 loss_cls: 3.7536 (3.9560) grad_norm: 2.3178 (2.3237) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 15:35:00 root] (utils.py 283): INFO Epoch: [7] [2160/2502] eta: 0:04:22 lr: 0.000018 loss_cls: 3.7536 (3.9557) grad_norm: 2.3512 (2.3240) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 15:35:08 root] (utils.py 283): INFO Epoch: [7] [2170/2502] eta: 0:04:14 lr: 0.000018 loss_cls: 3.7485 (3.9558) grad_norm: 2.3512 (2.3242) time: 0.7748 data: 0.0002 max mem: 8426 +[2024-12-10 15:35:16 root] (utils.py 283): INFO Epoch: [7] [2180/2502] eta: 0:04:07 lr: 0.000018 loss_cls: 4.0077 (3.9551) grad_norm: 2.3632 (2.3245) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 15:35:23 root] (utils.py 283): INFO Epoch: [7] [2190/2502] eta: 0:03:59 lr: 0.000018 loss_cls: 4.1705 (3.9563) grad_norm: 2.3220 (2.3243) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 15:35:31 root] (utils.py 283): INFO Epoch: [7] [2200/2502] eta: 0:03:51 lr: 0.000018 loss_cls: 4.0679 (3.9549) grad_norm: 2.2741 (2.3242) time: 0.7761 data: 0.0003 max mem: 8426 +[2024-12-10 15:35:39 root] (utils.py 283): INFO Epoch: [7] [2210/2502] eta: 0:03:44 lr: 0.000018 loss_cls: 3.8127 (3.9557) grad_norm: 2.2741 (2.3241) time: 0.7789 data: 0.0003 max mem: 8426 +[2024-12-10 15:35:47 root] (utils.py 283): INFO Epoch: [7] [2220/2502] eta: 0:03:36 lr: 0.000018 loss_cls: 4.1361 (3.9563) grad_norm: 2.2890 (2.3241) time: 0.7824 data: 0.0003 max mem: 8426 +[2024-12-10 15:35:55 root] (utils.py 283): INFO Epoch: [7] [2230/2502] eta: 0:03:28 lr: 0.000018 loss_cls: 4.0866 (3.9562) grad_norm: 2.2484 (2.3236) time: 0.7837 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:02 root] (utils.py 283): INFO Epoch: [7] [2240/2502] eta: 0:03:21 lr: 0.000018 loss_cls: 4.0049 (3.9558) grad_norm: 2.2569 (2.3237) time: 0.7840 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:10 root] (utils.py 283): INFO Epoch: [7] [2250/2502] eta: 0:03:13 lr: 0.000018 loss_cls: 4.1981 (3.9564) grad_norm: 2.2569 (2.3234) time: 0.7831 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:18 root] (utils.py 283): INFO Epoch: [7] [2260/2502] eta: 0:03:05 lr: 0.000018 loss_cls: 4.2008 (3.9554) grad_norm: 2.2987 (2.3237) time: 0.7818 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:26 root] (utils.py 283): INFO Epoch: [7] [2270/2502] eta: 0:02:58 lr: 0.000018 loss_cls: 3.7559 (3.9551) grad_norm: 2.2925 (2.3232) time: 0.7818 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:34 root] (utils.py 283): INFO Epoch: [7] [2280/2502] eta: 0:02:50 lr: 0.000018 loss_cls: 3.7559 (3.9547) grad_norm: 2.2555 (2.3232) time: 0.7812 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:41 root] (utils.py 283): INFO Epoch: [7] [2290/2502] eta: 0:02:42 lr: 0.000018 loss_cls: 3.9909 (3.9550) grad_norm: 2.2555 (2.3228) time: 0.7821 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:49 root] (utils.py 283): INFO Epoch: [7] [2300/2502] eta: 0:02:35 lr: 0.000018 loss_cls: 3.8967 (3.9533) grad_norm: 2.2224 (2.3227) time: 0.7822 data: 0.0003 max mem: 8426 +[2024-12-10 15:36:57 root] (utils.py 283): INFO Epoch: [7] [2310/2502] eta: 0:02:27 lr: 0.000018 loss_cls: 3.8855 (3.9532) grad_norm: 2.3331 (2.3228) time: 0.7816 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:05 root] (utils.py 283): INFO Epoch: [7] [2320/2502] eta: 0:02:19 lr: 0.000018 loss_cls: 4.2080 (3.9532) grad_norm: 2.3448 (2.3227) time: 0.7827 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:13 root] (utils.py 283): INFO Epoch: [7] [2330/2502] eta: 0:02:12 lr: 0.000018 loss_cls: 4.2080 (3.9539) grad_norm: 2.3138 (2.3227) time: 0.7909 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:21 root] (utils.py 283): INFO Epoch: [7] [2340/2502] eta: 0:02:04 lr: 0.000018 loss_cls: 3.8924 (3.9527) grad_norm: 2.3191 (2.3230) time: 0.7898 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:29 root] (utils.py 283): INFO Epoch: [7] [2350/2502] eta: 0:01:56 lr: 0.000018 loss_cls: 3.9438 (3.9530) grad_norm: 2.2987 (2.3228) time: 0.7815 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:36 root] (utils.py 283): INFO Epoch: [7] [2360/2502] eta: 0:01:49 lr: 0.000018 loss_cls: 4.0735 (3.9534) grad_norm: 2.2751 (2.3228) time: 0.7859 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:44 root] (utils.py 283): INFO Epoch: [7] [2370/2502] eta: 0:01:41 lr: 0.000018 loss_cls: 4.0735 (3.9534) grad_norm: 2.3045 (2.3229) time: 0.7846 data: 0.0003 max mem: 8426 +[2024-12-10 15:37:52 root] (utils.py 283): INFO Epoch: [7] [2380/2502] eta: 0:01:33 lr: 0.000018 loss_cls: 4.2862 (3.9542) grad_norm: 2.3021 (2.3229) time: 0.7782 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:00 root] (utils.py 283): INFO Epoch: [7] [2390/2502] eta: 0:01:26 lr: 0.000018 loss_cls: 4.2320 (3.9543) grad_norm: 2.2564 (2.3227) time: 0.7785 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:08 root] (utils.py 283): INFO Epoch: [7] [2400/2502] eta: 0:01:18 lr: 0.000018 loss_cls: 3.9059 (3.9534) grad_norm: 2.2666 (2.3230) time: 0.7795 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:15 root] (utils.py 283): INFO Epoch: [7] [2410/2502] eta: 0:01:10 lr: 0.000018 loss_cls: 3.9693 (3.9540) grad_norm: 2.3840 (2.3236) time: 0.7781 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:23 root] (utils.py 283): INFO Epoch: [7] [2420/2502] eta: 0:01:03 lr: 0.000018 loss_cls: 3.9693 (3.9542) grad_norm: 2.3403 (2.3237) time: 0.7779 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:31 root] (utils.py 283): INFO Epoch: [7] [2430/2502] eta: 0:00:55 lr: 0.000018 loss_cls: 3.8870 (3.9539) grad_norm: 2.2857 (2.3235) time: 0.7783 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:39 root] (utils.py 283): INFO Epoch: [7] [2440/2502] eta: 0:00:47 lr: 0.000018 loss_cls: 3.9979 (3.9544) grad_norm: 2.2857 (2.3236) time: 0.7775 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:47 root] (utils.py 283): INFO Epoch: [7] [2450/2502] eta: 0:00:40 lr: 0.000018 loss_cls: 4.0489 (3.9540) grad_norm: 2.3102 (2.3236) time: 0.7781 data: 0.0003 max mem: 8426 +[2024-12-10 15:38:54 root] (utils.py 283): INFO Epoch: [7] [2460/2502] eta: 0:00:32 lr: 0.000018 loss_cls: 3.9762 (3.9531) grad_norm: 2.3474 (2.3239) time: 0.7787 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:02 root] (utils.py 283): INFO Epoch: [7] [2470/2502] eta: 0:00:24 lr: 0.000018 loss_cls: 3.6797 (3.9528) grad_norm: 2.3859 (2.3246) time: 0.7783 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:10 root] (utils.py 283): INFO Epoch: [7] [2480/2502] eta: 0:00:16 lr: 0.000018 loss_cls: 3.7276 (3.9525) grad_norm: 2.3725 (2.3246) time: 0.7782 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:18 root] (utils.py 283): INFO Epoch: [7] [2490/2502] eta: 0:00:09 lr: 0.000018 loss_cls: 3.8895 (3.9528) grad_norm: 2.3061 (2.3243) time: 0.8017 data: 0.0229 max mem: 8426 +[2024-12-10 15:39:26 root] (utils.py 283): INFO Epoch: [7] [2500/2502] eta: 0:00:01 lr: 0.000018 loss_cls: 3.8895 (3.9522) grad_norm: 2.3344 (2.3246) time: 0.8055 data: 0.0229 max mem: 8426 +[2024-12-10 15:39:27 root] (utils.py 283): INFO Epoch: [7] [2501/2502] eta: 0:00:00 lr: 0.000018 loss_cls: 3.8980 (3.9523) grad_norm: 2.3104 (2.3245) time: 0.8072 data: 0.0229 max mem: 8426 +[2024-12-10 15:39:27 root] (utils.py 297): INFO Epoch: [7] Total time: 0:32:06 (0.7699 s / it) +[2024-12-10 15:39:27 root] (engine.py 179): INFO Averaged stats:lr: 0.000018 loss_cls: 3.8980 (3.9501) grad_norm: 2.3104 (2.3245) +[2024-12-10 15:39:27 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6426 (0.6426) acc1: 87.5000 (87.5000) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1274 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:29 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7476 (0.8150) acc1: 85.1562 (82.1733) acc3: 95.3125 (93.7500) acc5: 96.0938 (96.3778) time: 0.1277 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:30 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8581 (0.8684) acc1: 80.4688 (81.0640) acc3: 92.9688 (92.7827) acc5: 95.3125 (95.4613) time: 0.1279 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:31 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9366 (0.8816) acc1: 78.9062 (80.2167) acc3: 92.9688 (93.0948) acc5: 96.0938 (95.6149) time: 0.1293 data: 0.0004 max mem: 8426 +[2024-12-10 15:39:33 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8458 (0.8724) acc1: 78.1250 (80.3544) acc3: 94.5312 (93.3117) acc5: 96.8750 (95.7508) time: 0.1339 data: 0.0004 max mem: 8426 +[2024-12-10 15:39:34 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0717 (0.9596) acc1: 75.0000 (78.3701) acc3: 88.2812 (91.8811) acc5: 92.9688 (94.6998) time: 0.1370 data: 0.0004 max mem: 8426 +[2024-12-10 15:39:35 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2430 (1.0062) acc1: 72.6562 (77.6383) acc3: 85.1562 (90.9452) acc5: 89.8438 (93.8012) time: 0.1486 data: 0.0149 max mem: 8426 +[2024-12-10 15:39:37 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2253 (1.0486) acc1: 74.2188 (76.8046) acc3: 86.7188 (90.3279) acc5: 90.6250 (93.3319) time: 0.1509 data: 0.0214 max mem: 8426 +[2024-12-10 15:39:38 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2253 (1.0823) acc1: 73.4375 (76.0031) acc3: 86.7188 (89.7859) acc5: 90.6250 (92.9398) time: 0.1356 data: 0.0078 max mem: 8426 +[2024-12-10 15:39:39 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2350 (1.1097) acc1: 72.6562 (75.3262) acc3: 85.9375 (89.4231) acc5: 90.6250 (92.6683) time: 0.1294 data: 0.0013 max mem: 8426 +[2024-12-10 15:39:40 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1760 (1.0986) acc1: 72.6562 (75.5600) acc3: 88.2812 (89.5920) acc5: 91.4062 (92.8560) time: 0.1289 data: 0.0007 max mem: 8426 +[2024-12-10 15:39:40 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1353 s / it) +[2024-12-10 15:39:41 root] (engine.py 264): INFO * Acc@1 75.468 Acc@3 89.618 Acc@5 92.824 loss 1.101 flops 1.285 layer_flops 1.251 +[2024-12-10 15:39:41 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 15:39:41 root] (main.py 576): INFO Max accuracy: 75.57% +[2024-12-10 15:39:42 root] (utils.py 283): INFO Epoch: [8] [ 0/2502] eta: 0:34:11 lr: 0.000018 loss_cls: 4.2240 (4.2240) grad_norm: 2.3276 (2.3276) time: 0.8199 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:50 root] (utils.py 283): INFO Epoch: [8] [ 10/2502] eta: 0:31:58 lr: 0.000018 loss_cls: 4.2240 (3.9544) grad_norm: 2.3203 (2.3195) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 15:39:57 root] (utils.py 283): INFO Epoch: [8] [ 20/2502] eta: 0:31:54 lr: 0.000018 loss_cls: 4.3036 (4.0927) grad_norm: 2.3203 (2.3566) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 15:40:05 root] (utils.py 283): INFO Epoch: [8] [ 30/2502] eta: 0:31:34 lr: 0.000018 loss_cls: 4.3036 (4.1092) grad_norm: 2.3729 (2.3552) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 15:40:12 root] (utils.py 283): INFO Epoch: [8] [ 40/2502] eta: 0:31:26 lr: 0.000018 loss_cls: 4.2628 (4.0879) grad_norm: 2.2888 (2.3254) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 15:40:20 root] (utils.py 283): INFO Epoch: [8] [ 50/2502] eta: 0:31:14 lr: 0.000018 loss_cls: 4.2397 (4.0714) grad_norm: 2.3050 (2.3431) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 15:40:28 root] (utils.py 283): INFO Epoch: [8] [ 60/2502] eta: 0:31:07 lr: 0.000018 loss_cls: 4.1772 (4.0911) grad_norm: 2.3541 (2.3398) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 15:40:35 root] (utils.py 283): INFO Epoch: [8] [ 70/2502] eta: 0:31:00 lr: 0.000018 loss_cls: 4.1367 (4.0799) grad_norm: 2.2990 (2.3291) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 15:40:43 root] (utils.py 283): INFO Epoch: [8] [ 80/2502] eta: 0:30:52 lr: 0.000018 loss_cls: 4.1736 (4.1022) grad_norm: 2.2777 (2.3279) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 15:40:51 root] (utils.py 283): INFO Epoch: [8] [ 90/2502] eta: 0:30:45 lr: 0.000018 loss_cls: 4.1933 (4.0751) grad_norm: 2.2777 (2.3253) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 15:40:58 root] (utils.py 283): INFO Epoch: [8] [ 100/2502] eta: 0:30:40 lr: 0.000018 loss_cls: 3.7921 (4.0393) grad_norm: 2.3279 (2.3247) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:06 root] (utils.py 283): INFO Epoch: [8] [ 110/2502] eta: 0:30:33 lr: 0.000018 loss_cls: 3.8007 (4.0227) grad_norm: 2.3158 (2.3230) time: 0.7742 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:14 root] (utils.py 283): INFO Epoch: [8] [ 120/2502] eta: 0:30:25 lr: 0.000018 loss_cls: 3.9055 (4.0021) grad_norm: 2.3186 (2.3242) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:21 root] (utils.py 283): INFO Epoch: [8] [ 130/2502] eta: 0:30:16 lr: 0.000018 loss_cls: 3.9055 (3.9969) grad_norm: 2.3511 (2.3234) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:29 root] (utils.py 283): INFO Epoch: [8] [ 140/2502] eta: 0:30:08 lr: 0.000018 loss_cls: 4.1698 (4.0102) grad_norm: 2.2818 (2.3220) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:37 root] (utils.py 283): INFO Epoch: [8] [ 150/2502] eta: 0:30:00 lr: 0.000018 loss_cls: 4.0298 (3.9952) grad_norm: 2.3102 (2.3203) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:44 root] (utils.py 283): INFO Epoch: [8] [ 160/2502] eta: 0:29:52 lr: 0.000018 loss_cls: 3.7111 (3.9877) grad_norm: 2.3608 (2.3260) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 15:41:52 root] (utils.py 283): INFO Epoch: [8] [ 170/2502] eta: 0:29:44 lr: 0.000018 loss_cls: 3.6046 (3.9629) grad_norm: 2.3115 (2.3231) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 15:42:00 root] (utils.py 283): INFO Epoch: [8] [ 180/2502] eta: 0:29:36 lr: 0.000018 loss_cls: 3.7646 (3.9573) grad_norm: 2.2837 (2.3271) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 15:42:07 root] (utils.py 283): INFO Epoch: [8] [ 190/2502] eta: 0:29:31 lr: 0.000018 loss_cls: 3.9123 (3.9570) grad_norm: 2.3882 (2.3254) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 15:42:15 root] (utils.py 283): INFO Epoch: [8] [ 200/2502] eta: 0:29:24 lr: 0.000018 loss_cls: 4.0471 (3.9531) grad_norm: 2.3067 (2.3265) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 15:42:23 root] (utils.py 283): INFO Epoch: [8] [ 210/2502] eta: 0:29:16 lr: 0.000018 loss_cls: 4.0702 (3.9587) grad_norm: 2.3376 (2.3283) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 15:42:30 root] (utils.py 283): INFO Epoch: [8] [ 220/2502] eta: 0:29:08 lr: 0.000018 loss_cls: 4.0702 (3.9562) grad_norm: 2.3621 (2.3309) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 15:42:38 root] (utils.py 283): INFO Epoch: [8] [ 230/2502] eta: 0:29:00 lr: 0.000018 loss_cls: 4.1371 (3.9620) grad_norm: 2.3621 (2.3321) time: 0.7608 data: 0.0003 max mem: 8426 +[2024-12-10 15:42:46 root] (utils.py 283): INFO Epoch: [8] [ 240/2502] eta: 0:28:52 lr: 0.000018 loss_cls: 3.8252 (3.9427) grad_norm: 2.2912 (2.3330) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-10 15:42:53 root] (utils.py 283): INFO Epoch: [8] [ 250/2502] eta: 0:28:44 lr: 0.000018 loss_cls: 3.8004 (3.9417) grad_norm: 2.3570 (2.3334) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 15:43:01 root] (utils.py 283): INFO Epoch: [8] [ 260/2502] eta: 0:28:36 lr: 0.000018 loss_cls: 4.1103 (3.9394) grad_norm: 2.3181 (2.3324) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 15:43:09 root] (utils.py 283): INFO Epoch: [8] [ 270/2502] eta: 0:28:28 lr: 0.000018 loss_cls: 4.1694 (3.9501) grad_norm: 2.2861 (2.3305) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 15:43:16 root] (utils.py 283): INFO Epoch: [8] [ 280/2502] eta: 0:28:20 lr: 0.000018 loss_cls: 4.3047 (3.9604) grad_norm: 2.2861 (2.3295) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 15:43:24 root] (utils.py 283): INFO Epoch: [8] [ 290/2502] eta: 0:28:13 lr: 0.000018 loss_cls: 4.0602 (3.9437) grad_norm: 2.3138 (2.3294) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 15:43:31 root] (utils.py 283): INFO Epoch: [8] [ 300/2502] eta: 0:28:05 lr: 0.000018 loss_cls: 3.8302 (3.9490) grad_norm: 2.3106 (2.3285) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 15:43:39 root] (utils.py 283): INFO Epoch: [8] [ 310/2502] eta: 0:27:57 lr: 0.000018 loss_cls: 3.9954 (3.9484) grad_norm: 2.3106 (2.3280) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 15:43:47 root] (utils.py 283): INFO Epoch: [8] [ 320/2502] eta: 0:27:50 lr: 0.000018 loss_cls: 3.9954 (3.9585) grad_norm: 2.2847 (2.3275) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 15:43:54 root] (utils.py 283): INFO Epoch: [8] [ 330/2502] eta: 0:27:42 lr: 0.000018 loss_cls: 3.9346 (3.9475) grad_norm: 2.3714 (2.3300) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:02 root] (utils.py 283): INFO Epoch: [8] [ 340/2502] eta: 0:27:34 lr: 0.000018 loss_cls: 3.6004 (3.9437) grad_norm: 2.3973 (2.3300) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:10 root] (utils.py 283): INFO Epoch: [8] [ 350/2502] eta: 0:27:26 lr: 0.000018 loss_cls: 3.6285 (3.9428) grad_norm: 2.3070 (2.3313) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:17 root] (utils.py 283): INFO Epoch: [8] [ 360/2502] eta: 0:27:18 lr: 0.000018 loss_cls: 4.2239 (3.9481) grad_norm: 2.3159 (2.3311) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 15:44:25 root] (utils.py 283): INFO Epoch: [8] [ 370/2502] eta: 0:27:11 lr: 0.000018 loss_cls: 4.1290 (3.9440) grad_norm: 2.3050 (2.3304) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:33 root] (utils.py 283): INFO Epoch: [8] [ 380/2502] eta: 0:27:03 lr: 0.000018 loss_cls: 4.1254 (3.9493) grad_norm: 2.3545 (2.3322) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:40 root] (utils.py 283): INFO Epoch: [8] [ 390/2502] eta: 0:26:55 lr: 0.000018 loss_cls: 4.0887 (3.9437) grad_norm: 2.3169 (2.3301) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:48 root] (utils.py 283): INFO Epoch: [8] [ 400/2502] eta: 0:26:47 lr: 0.000018 loss_cls: 3.7320 (3.9406) grad_norm: 2.2813 (2.3305) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 15:44:55 root] (utils.py 283): INFO Epoch: [8] [ 410/2502] eta: 0:26:39 lr: 0.000018 loss_cls: 4.2369 (3.9494) grad_norm: 2.3219 (2.3310) time: 0.7590 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:03 root] (utils.py 283): INFO Epoch: [8] [ 420/2502] eta: 0:26:31 lr: 0.000018 loss_cls: 4.2369 (3.9483) grad_norm: 2.3070 (2.3310) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:11 root] (utils.py 283): INFO Epoch: [8] [ 430/2502] eta: 0:26:24 lr: 0.000018 loss_cls: 3.8420 (3.9440) grad_norm: 2.3609 (2.3324) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:18 root] (utils.py 283): INFO Epoch: [8] [ 440/2502] eta: 0:26:15 lr: 0.000018 loss_cls: 3.9188 (3.9463) grad_norm: 2.3522 (2.3329) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:26 root] (utils.py 283): INFO Epoch: [8] [ 450/2502] eta: 0:26:07 lr: 0.000018 loss_cls: 4.0518 (3.9457) grad_norm: 2.2674 (2.3306) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:33 root] (utils.py 283): INFO Epoch: [8] [ 460/2502] eta: 0:25:59 lr: 0.000018 loss_cls: 4.0518 (3.9482) grad_norm: 2.2877 (2.3302) time: 0.7535 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:41 root] (utils.py 283): INFO Epoch: [8] [ 470/2502] eta: 0:25:51 lr: 0.000018 loss_cls: 4.0989 (3.9518) grad_norm: 2.3133 (2.3306) time: 0.7540 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:48 root] (utils.py 283): INFO Epoch: [8] [ 480/2502] eta: 0:25:43 lr: 0.000018 loss_cls: 4.0657 (3.9491) grad_norm: 2.3133 (2.3300) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 15:45:56 root] (utils.py 283): INFO Epoch: [8] [ 490/2502] eta: 0:25:36 lr: 0.000018 loss_cls: 3.9067 (3.9498) grad_norm: 2.2989 (2.3301) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:04 root] (utils.py 283): INFO Epoch: [8] [ 500/2502] eta: 0:25:28 lr: 0.000018 loss_cls: 4.1455 (3.9517) grad_norm: 2.2857 (2.3289) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:11 root] (utils.py 283): INFO Epoch: [8] [ 510/2502] eta: 0:25:20 lr: 0.000018 loss_cls: 4.1749 (3.9513) grad_norm: 2.2492 (2.3277) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:19 root] (utils.py 283): INFO Epoch: [8] [ 520/2502] eta: 0:25:12 lr: 0.000018 loss_cls: 4.1749 (3.9526) grad_norm: 2.2578 (2.3269) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:26 root] (utils.py 283): INFO Epoch: [8] [ 530/2502] eta: 0:25:05 lr: 0.000018 loss_cls: 4.0345 (3.9553) grad_norm: 2.2578 (2.3258) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:34 root] (utils.py 283): INFO Epoch: [8] [ 540/2502] eta: 0:24:57 lr: 0.000018 loss_cls: 3.9284 (3.9516) grad_norm: 2.2861 (2.3261) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:42 root] (utils.py 283): INFO Epoch: [8] [ 550/2502] eta: 0:24:49 lr: 0.000018 loss_cls: 3.9679 (3.9544) grad_norm: 2.3797 (2.3274) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:49 root] (utils.py 283): INFO Epoch: [8] [ 560/2502] eta: 0:24:42 lr: 0.000018 loss_cls: 4.2041 (3.9576) grad_norm: 2.3310 (2.3278) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 15:46:57 root] (utils.py 283): INFO Epoch: [8] [ 570/2502] eta: 0:24:34 lr: 0.000018 loss_cls: 4.0527 (3.9598) grad_norm: 2.2902 (2.3264) time: 0.7577 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:04 root] (utils.py 283): INFO Epoch: [8] [ 580/2502] eta: 0:24:26 lr: 0.000018 loss_cls: 4.0502 (3.9579) grad_norm: 2.2595 (2.3260) time: 0.7533 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:12 root] (utils.py 283): INFO Epoch: [8] [ 590/2502] eta: 0:24:18 lr: 0.000018 loss_cls: 3.8963 (3.9590) grad_norm: 2.2595 (2.3256) time: 0.7538 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:20 root] (utils.py 283): INFO Epoch: [8] [ 600/2502] eta: 0:24:10 lr: 0.000018 loss_cls: 4.1771 (3.9656) grad_norm: 2.3035 (2.3254) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:27 root] (utils.py 283): INFO Epoch: [8] [ 610/2502] eta: 0:24:03 lr: 0.000018 loss_cls: 4.2600 (3.9647) grad_norm: 2.2875 (2.3254) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:35 root] (utils.py 283): INFO Epoch: [8] [ 620/2502] eta: 0:23:55 lr: 0.000018 loss_cls: 4.0073 (3.9627) grad_norm: 2.2836 (2.3256) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:42 root] (utils.py 283): INFO Epoch: [8] [ 630/2502] eta: 0:23:47 lr: 0.000018 loss_cls: 4.0103 (3.9625) grad_norm: 2.2836 (2.3253) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:50 root] (utils.py 283): INFO Epoch: [8] [ 640/2502] eta: 0:23:40 lr: 0.000018 loss_cls: 4.2127 (3.9668) grad_norm: 2.3319 (2.3253) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 15:47:58 root] (utils.py 283): INFO Epoch: [8] [ 650/2502] eta: 0:23:32 lr: 0.000018 loss_cls: 4.1408 (3.9634) grad_norm: 2.3395 (2.3254) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:05 root] (utils.py 283): INFO Epoch: [8] [ 660/2502] eta: 0:23:24 lr: 0.000018 loss_cls: 3.9402 (3.9640) grad_norm: 2.3395 (2.3256) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:13 root] (utils.py 283): INFO Epoch: [8] [ 670/2502] eta: 0:23:16 lr: 0.000018 loss_cls: 3.9366 (3.9636) grad_norm: 2.2978 (2.3248) time: 0.7544 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:20 root] (utils.py 283): INFO Epoch: [8] [ 680/2502] eta: 0:23:09 lr: 0.000018 loss_cls: 3.9366 (3.9638) grad_norm: 2.2672 (2.3242) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:28 root] (utils.py 283): INFO Epoch: [8] [ 690/2502] eta: 0:23:01 lr: 0.000018 loss_cls: 3.9737 (3.9632) grad_norm: 2.3018 (2.3246) time: 0.7565 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:35 root] (utils.py 283): INFO Epoch: [8] [ 700/2502] eta: 0:22:53 lr: 0.000018 loss_cls: 3.9960 (3.9636) grad_norm: 2.3439 (2.3247) time: 0.7569 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:43 root] (utils.py 283): INFO Epoch: [8] [ 710/2502] eta: 0:22:46 lr: 0.000018 loss_cls: 4.0328 (3.9650) grad_norm: 2.2970 (2.3249) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:51 root] (utils.py 283): INFO Epoch: [8] [ 720/2502] eta: 0:22:38 lr: 0.000018 loss_cls: 3.9179 (3.9614) grad_norm: 2.2878 (2.3243) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 15:48:58 root] (utils.py 283): INFO Epoch: [8] [ 730/2502] eta: 0:22:30 lr: 0.000018 loss_cls: 3.8277 (3.9622) grad_norm: 2.3196 (2.3242) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:06 root] (utils.py 283): INFO Epoch: [8] [ 740/2502] eta: 0:22:22 lr: 0.000018 loss_cls: 4.1200 (3.9616) grad_norm: 2.3196 (2.3240) time: 0.7528 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:13 root] (utils.py 283): INFO Epoch: [8] [ 750/2502] eta: 0:22:14 lr: 0.000018 loss_cls: 4.1200 (3.9617) grad_norm: 2.2366 (2.3226) time: 0.7540 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:21 root] (utils.py 283): INFO Epoch: [8] [ 760/2502] eta: 0:22:07 lr: 0.000018 loss_cls: 4.1564 (3.9617) grad_norm: 2.2351 (2.3219) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:29 root] (utils.py 283): INFO Epoch: [8] [ 770/2502] eta: 0:21:59 lr: 0.000018 loss_cls: 4.2234 (3.9651) grad_norm: 2.2656 (2.3218) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:36 root] (utils.py 283): INFO Epoch: [8] [ 780/2502] eta: 0:21:51 lr: 0.000018 loss_cls: 4.2080 (3.9612) grad_norm: 2.2975 (2.3221) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:44 root] (utils.py 283): INFO Epoch: [8] [ 790/2502] eta: 0:21:44 lr: 0.000018 loss_cls: 3.5375 (3.9592) grad_norm: 2.3055 (2.3217) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:51 root] (utils.py 283): INFO Epoch: [8] [ 800/2502] eta: 0:21:36 lr: 0.000018 loss_cls: 4.2354 (3.9620) grad_norm: 2.2939 (2.3215) time: 0.7554 data: 0.0002 max mem: 8426 +[2024-12-10 15:49:59 root] (utils.py 283): INFO Epoch: [8] [ 810/2502] eta: 0:21:28 lr: 0.000018 loss_cls: 4.3171 (3.9611) grad_norm: 2.3485 (2.3225) time: 0.7546 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:06 root] (utils.py 283): INFO Epoch: [8] [ 820/2502] eta: 0:21:20 lr: 0.000018 loss_cls: 4.3349 (3.9657) grad_norm: 2.3596 (2.3233) time: 0.7558 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:14 root] (utils.py 283): INFO Epoch: [8] [ 830/2502] eta: 0:21:13 lr: 0.000018 loss_cls: 4.2771 (3.9680) grad_norm: 2.2898 (2.3228) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:22 root] (utils.py 283): INFO Epoch: [8] [ 840/2502] eta: 0:21:05 lr: 0.000018 loss_cls: 4.0268 (3.9666) grad_norm: 2.2898 (2.3227) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:29 root] (utils.py 283): INFO Epoch: [8] [ 850/2502] eta: 0:20:57 lr: 0.000018 loss_cls: 3.8687 (3.9627) grad_norm: 2.3026 (2.3222) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:37 root] (utils.py 283): INFO Epoch: [8] [ 860/2502] eta: 0:20:50 lr: 0.000018 loss_cls: 3.9587 (3.9636) grad_norm: 2.2650 (2.3214) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:45 root] (utils.py 283): INFO Epoch: [8] [ 870/2502] eta: 0:20:43 lr: 0.000018 loss_cls: 3.9587 (3.9597) grad_norm: 2.2697 (2.3210) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-10 15:50:52 root] (utils.py 283): INFO Epoch: [8] [ 880/2502] eta: 0:20:35 lr: 0.000018 loss_cls: 3.9040 (3.9606) grad_norm: 2.2878 (2.3213) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:00 root] (utils.py 283): INFO Epoch: [8] [ 890/2502] eta: 0:20:28 lr: 0.000018 loss_cls: 3.9040 (3.9609) grad_norm: 2.3411 (2.3226) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:08 root] (utils.py 283): INFO Epoch: [8] [ 900/2502] eta: 0:20:20 lr: 0.000018 loss_cls: 3.8729 (3.9606) grad_norm: 2.3659 (2.3231) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:15 root] (utils.py 283): INFO Epoch: [8] [ 910/2502] eta: 0:20:12 lr: 0.000018 loss_cls: 4.0590 (3.9608) grad_norm: 2.3659 (2.3235) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:23 root] (utils.py 283): INFO Epoch: [8] [ 920/2502] eta: 0:20:04 lr: 0.000018 loss_cls: 4.0590 (3.9600) grad_norm: 2.3083 (2.3233) time: 0.7540 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:30 root] (utils.py 283): INFO Epoch: [8] [ 930/2502] eta: 0:19:57 lr: 0.000018 loss_cls: 3.6372 (3.9546) grad_norm: 2.2938 (2.3237) time: 0.7507 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:38 root] (utils.py 283): INFO Epoch: [8] [ 940/2502] eta: 0:19:49 lr: 0.000018 loss_cls: 3.6659 (3.9584) grad_norm: 2.3068 (2.3235) time: 0.7531 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:45 root] (utils.py 283): INFO Epoch: [8] [ 950/2502] eta: 0:19:41 lr: 0.000018 loss_cls: 4.2319 (3.9576) grad_norm: 2.2790 (2.3229) time: 0.7520 data: 0.0002 max mem: 8426 +[2024-12-10 15:51:53 root] (utils.py 283): INFO Epoch: [8] [ 960/2502] eta: 0:19:33 lr: 0.000018 loss_cls: 4.1064 (3.9551) grad_norm: 2.3316 (2.3237) time: 0.7488 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:00 root] (utils.py 283): INFO Epoch: [8] [ 970/2502] eta: 0:19:25 lr: 0.000018 loss_cls: 3.7823 (3.9540) grad_norm: 2.3708 (2.3242) time: 0.7509 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:08 root] (utils.py 283): INFO Epoch: [8] [ 980/2502] eta: 0:19:18 lr: 0.000018 loss_cls: 4.1147 (3.9558) grad_norm: 2.3419 (2.3247) time: 0.7545 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:15 root] (utils.py 283): INFO Epoch: [8] [ 990/2502] eta: 0:19:10 lr: 0.000018 loss_cls: 4.2034 (3.9578) grad_norm: 2.3027 (2.3241) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:23 root] (utils.py 283): INFO Epoch: [8] [1000/2502] eta: 0:19:02 lr: 0.000018 loss_cls: 4.1164 (3.9561) grad_norm: 2.3027 (2.3242) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:30 root] (utils.py 283): INFO Epoch: [8] [1010/2502] eta: 0:18:55 lr: 0.000018 loss_cls: 3.5624 (3.9550) grad_norm: 2.3711 (2.3254) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:38 root] (utils.py 283): INFO Epoch: [8] [1020/2502] eta: 0:18:47 lr: 0.000018 loss_cls: 4.1634 (3.9552) grad_norm: 2.3171 (2.3255) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:46 root] (utils.py 283): INFO Epoch: [8] [1030/2502] eta: 0:18:39 lr: 0.000018 loss_cls: 4.1634 (3.9555) grad_norm: 2.2852 (2.3257) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 15:52:53 root] (utils.py 283): INFO Epoch: [8] [1040/2502] eta: 0:18:32 lr: 0.000018 loss_cls: 3.9233 (3.9544) grad_norm: 2.3080 (2.3255) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:01 root] (utils.py 283): INFO Epoch: [8] [1050/2502] eta: 0:18:24 lr: 0.000018 loss_cls: 3.8853 (3.9549) grad_norm: 2.2965 (2.3253) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:08 root] (utils.py 283): INFO Epoch: [8] [1060/2502] eta: 0:18:16 lr: 0.000018 loss_cls: 3.8853 (3.9530) grad_norm: 2.2965 (2.3252) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:16 root] (utils.py 283): INFO Epoch: [8] [1070/2502] eta: 0:18:09 lr: 0.000018 loss_cls: 3.9538 (3.9524) grad_norm: 2.2799 (2.3250) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:23 root] (utils.py 283): INFO Epoch: [8] [1080/2502] eta: 0:18:01 lr: 0.000018 loss_cls: 3.9538 (3.9506) grad_norm: 2.2568 (2.3247) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:31 root] (utils.py 283): INFO Epoch: [8] [1090/2502] eta: 0:17:53 lr: 0.000018 loss_cls: 4.0482 (3.9490) grad_norm: 2.2657 (2.3244) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:39 root] (utils.py 283): INFO Epoch: [8] [1100/2502] eta: 0:17:46 lr: 0.000018 loss_cls: 3.9598 (3.9489) grad_norm: 2.2746 (2.3243) time: 0.7568 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:46 root] (utils.py 283): INFO Epoch: [8] [1110/2502] eta: 0:17:38 lr: 0.000018 loss_cls: 3.8817 (3.9468) grad_norm: 2.2858 (2.3243) time: 0.7553 data: 0.0002 max mem: 8426 +[2024-12-10 15:53:54 root] (utils.py 283): INFO Epoch: [8] [1120/2502] eta: 0:17:30 lr: 0.000018 loss_cls: 3.8608 (3.9449) grad_norm: 2.3155 (2.3240) time: 0.7538 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:01 root] (utils.py 283): INFO Epoch: [8] [1130/2502] eta: 0:17:23 lr: 0.000018 loss_cls: 3.9680 (3.9465) grad_norm: 2.2332 (2.3232) time: 0.7529 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:09 root] (utils.py 283): INFO Epoch: [8] [1140/2502] eta: 0:17:15 lr: 0.000018 loss_cls: 4.1549 (3.9479) grad_norm: 2.3020 (2.3239) time: 0.7537 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:16 root] (utils.py 283): INFO Epoch: [8] [1150/2502] eta: 0:17:07 lr: 0.000018 loss_cls: 4.1130 (3.9479) grad_norm: 2.3664 (2.3239) time: 0.7556 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:24 root] (utils.py 283): INFO Epoch: [8] [1160/2502] eta: 0:17:00 lr: 0.000018 loss_cls: 4.0137 (3.9473) grad_norm: 2.3454 (2.3243) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:32 root] (utils.py 283): INFO Epoch: [8] [1170/2502] eta: 0:16:52 lr: 0.000018 loss_cls: 4.2248 (3.9487) grad_norm: 2.3497 (2.3241) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:39 root] (utils.py 283): INFO Epoch: [8] [1180/2502] eta: 0:16:45 lr: 0.000018 loss_cls: 4.3399 (3.9510) grad_norm: 2.2854 (2.3245) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:47 root] (utils.py 283): INFO Epoch: [8] [1190/2502] eta: 0:16:37 lr: 0.000018 loss_cls: 4.0416 (3.9514) grad_norm: 2.3160 (2.3245) time: 0.7553 data: 0.0002 max mem: 8426 +[2024-12-10 15:54:54 root] (utils.py 283): INFO Epoch: [8] [1200/2502] eta: 0:16:29 lr: 0.000018 loss_cls: 4.1796 (3.9529) grad_norm: 2.3160 (2.3244) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:02 root] (utils.py 283): INFO Epoch: [8] [1210/2502] eta: 0:16:22 lr: 0.000018 loss_cls: 4.2400 (3.9524) grad_norm: 2.3592 (2.3254) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:09 root] (utils.py 283): INFO Epoch: [8] [1220/2502] eta: 0:16:14 lr: 0.000018 loss_cls: 4.1725 (3.9528) grad_norm: 2.3644 (2.3256) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:17 root] (utils.py 283): INFO Epoch: [8] [1230/2502] eta: 0:16:06 lr: 0.000018 loss_cls: 3.9803 (3.9536) grad_norm: 2.2870 (2.3252) time: 0.7541 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:25 root] (utils.py 283): INFO Epoch: [8] [1240/2502] eta: 0:15:59 lr: 0.000018 loss_cls: 4.0185 (3.9536) grad_norm: 2.2648 (2.3252) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:32 root] (utils.py 283): INFO Epoch: [8] [1250/2502] eta: 0:15:51 lr: 0.000018 loss_cls: 4.1990 (3.9539) grad_norm: 2.3468 (2.3252) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:40 root] (utils.py 283): INFO Epoch: [8] [1260/2502] eta: 0:15:44 lr: 0.000018 loss_cls: 4.1892 (3.9541) grad_norm: 2.2621 (2.3246) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:47 root] (utils.py 283): INFO Epoch: [8] [1270/2502] eta: 0:15:36 lr: 0.000018 loss_cls: 4.0649 (3.9554) grad_norm: 2.2678 (2.3249) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 15:55:55 root] (utils.py 283): INFO Epoch: [8] [1280/2502] eta: 0:15:28 lr: 0.000018 loss_cls: 3.9026 (3.9533) grad_norm: 2.3099 (2.3249) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:03 root] (utils.py 283): INFO Epoch: [8] [1290/2502] eta: 0:15:21 lr: 0.000018 loss_cls: 3.6165 (3.9509) grad_norm: 2.2981 (2.3245) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:10 root] (utils.py 283): INFO Epoch: [8] [1300/2502] eta: 0:15:13 lr: 0.000018 loss_cls: 3.3961 (3.9461) grad_norm: 2.2828 (2.3243) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:18 root] (utils.py 283): INFO Epoch: [8] [1310/2502] eta: 0:15:06 lr: 0.000018 loss_cls: 3.6028 (3.9475) grad_norm: 2.2894 (2.3246) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:25 root] (utils.py 283): INFO Epoch: [8] [1320/2502] eta: 0:14:58 lr: 0.000018 loss_cls: 4.1794 (3.9476) grad_norm: 2.2732 (2.3244) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:33 root] (utils.py 283): INFO Epoch: [8] [1330/2502] eta: 0:14:50 lr: 0.000018 loss_cls: 4.1520 (3.9483) grad_norm: 2.2922 (2.3246) time: 0.7542 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:40 root] (utils.py 283): INFO Epoch: [8] [1340/2502] eta: 0:14:43 lr: 0.000018 loss_cls: 4.1520 (3.9501) grad_norm: 2.2975 (2.3240) time: 0.7545 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:48 root] (utils.py 283): INFO Epoch: [8] [1350/2502] eta: 0:14:35 lr: 0.000018 loss_cls: 4.1883 (3.9520) grad_norm: 2.2727 (2.3240) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 15:56:56 root] (utils.py 283): INFO Epoch: [8] [1360/2502] eta: 0:14:27 lr: 0.000018 loss_cls: 4.1409 (3.9523) grad_norm: 2.2938 (2.3238) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:03 root] (utils.py 283): INFO Epoch: [8] [1370/2502] eta: 0:14:20 lr: 0.000018 loss_cls: 4.0505 (3.9508) grad_norm: 2.2938 (2.3236) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:11 root] (utils.py 283): INFO Epoch: [8] [1380/2502] eta: 0:14:12 lr: 0.000018 loss_cls: 4.0505 (3.9521) grad_norm: 2.3019 (2.3240) time: 0.7535 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:18 root] (utils.py 283): INFO Epoch: [8] [1390/2502] eta: 0:14:04 lr: 0.000018 loss_cls: 3.9598 (3.9511) grad_norm: 2.3172 (2.3243) time: 0.7527 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:26 root] (utils.py 283): INFO Epoch: [8] [1400/2502] eta: 0:13:57 lr: 0.000018 loss_cls: 3.6997 (3.9497) grad_norm: 2.3459 (2.3243) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:34 root] (utils.py 283): INFO Epoch: [8] [1410/2502] eta: 0:13:49 lr: 0.000018 loss_cls: 4.1029 (3.9496) grad_norm: 2.3459 (2.3243) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:41 root] (utils.py 283): INFO Epoch: [8] [1420/2502] eta: 0:13:42 lr: 0.000018 loss_cls: 4.0308 (3.9488) grad_norm: 2.2539 (2.3241) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:49 root] (utils.py 283): INFO Epoch: [8] [1430/2502] eta: 0:13:34 lr: 0.000018 loss_cls: 3.8116 (3.9478) grad_norm: 2.3011 (2.3243) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 15:57:56 root] (utils.py 283): INFO Epoch: [8] [1440/2502] eta: 0:13:26 lr: 0.000018 loss_cls: 3.8116 (3.9475) grad_norm: 2.2938 (2.3246) time: 0.7561 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:04 root] (utils.py 283): INFO Epoch: [8] [1450/2502] eta: 0:13:19 lr: 0.000018 loss_cls: 3.9680 (3.9455) grad_norm: 2.2940 (2.3249) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:11 root] (utils.py 283): INFO Epoch: [8] [1460/2502] eta: 0:13:11 lr: 0.000018 loss_cls: 4.0208 (3.9465) grad_norm: 2.3480 (2.3255) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:19 root] (utils.py 283): INFO Epoch: [8] [1470/2502] eta: 0:13:04 lr: 0.000018 loss_cls: 4.1496 (3.9462) grad_norm: 2.3019 (2.3253) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:27 root] (utils.py 283): INFO Epoch: [8] [1480/2502] eta: 0:12:56 lr: 0.000018 loss_cls: 3.9567 (3.9448) grad_norm: 2.2530 (2.3252) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:34 root] (utils.py 283): INFO Epoch: [8] [1490/2502] eta: 0:12:49 lr: 0.000018 loss_cls: 3.7306 (3.9436) grad_norm: 2.2812 (2.3254) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:42 root] (utils.py 283): INFO Epoch: [8] [1500/2502] eta: 0:12:41 lr: 0.000018 loss_cls: 3.9093 (3.9422) grad_norm: 2.3150 (2.3253) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:50 root] (utils.py 283): INFO Epoch: [8] [1510/2502] eta: 0:12:33 lr: 0.000018 loss_cls: 4.0698 (3.9430) grad_norm: 2.2707 (2.3246) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 15:58:57 root] (utils.py 283): INFO Epoch: [8] [1520/2502] eta: 0:12:26 lr: 0.000018 loss_cls: 3.9281 (3.9405) grad_norm: 2.2707 (2.3245) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:05 root] (utils.py 283): INFO Epoch: [8] [1530/2502] eta: 0:12:18 lr: 0.000018 loss_cls: 3.5260 (3.9394) grad_norm: 2.3273 (2.3248) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:13 root] (utils.py 283): INFO Epoch: [8] [1540/2502] eta: 0:12:11 lr: 0.000018 loss_cls: 3.5987 (3.9378) grad_norm: 2.3623 (2.3250) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:20 root] (utils.py 283): INFO Epoch: [8] [1550/2502] eta: 0:12:03 lr: 0.000018 loss_cls: 3.8780 (3.9381) grad_norm: 2.3546 (2.3251) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:28 root] (utils.py 283): INFO Epoch: [8] [1560/2502] eta: 0:11:56 lr: 0.000018 loss_cls: 4.1466 (3.9390) grad_norm: 2.2926 (2.3253) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:35 root] (utils.py 283): INFO Epoch: [8] [1570/2502] eta: 0:11:48 lr: 0.000018 loss_cls: 4.1796 (3.9397) grad_norm: 2.3381 (2.3254) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:43 root] (utils.py 283): INFO Epoch: [8] [1580/2502] eta: 0:11:40 lr: 0.000018 loss_cls: 3.8949 (3.9376) grad_norm: 2.3412 (2.3255) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:51 root] (utils.py 283): INFO Epoch: [8] [1590/2502] eta: 0:11:33 lr: 0.000018 loss_cls: 3.7415 (3.9359) grad_norm: 2.2643 (2.3250) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 15:59:58 root] (utils.py 283): INFO Epoch: [8] [1600/2502] eta: 0:11:25 lr: 0.000018 loss_cls: 3.3343 (3.9330) grad_norm: 2.2902 (2.3250) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:06 root] (utils.py 283): INFO Epoch: [8] [1610/2502] eta: 0:11:18 lr: 0.000018 loss_cls: 3.9829 (3.9350) grad_norm: 2.3584 (2.3254) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:13 root] (utils.py 283): INFO Epoch: [8] [1620/2502] eta: 0:11:10 lr: 0.000018 loss_cls: 4.3218 (3.9351) grad_norm: 2.3200 (2.3253) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:21 root] (utils.py 283): INFO Epoch: [8] [1630/2502] eta: 0:11:02 lr: 0.000018 loss_cls: 4.2013 (3.9354) grad_norm: 2.2929 (2.3253) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:29 root] (utils.py 283): INFO Epoch: [8] [1640/2502] eta: 0:10:55 lr: 0.000018 loss_cls: 4.0122 (3.9353) grad_norm: 2.3060 (2.3256) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:36 root] (utils.py 283): INFO Epoch: [8] [1650/2502] eta: 0:10:47 lr: 0.000018 loss_cls: 3.7646 (3.9342) grad_norm: 2.3377 (2.3256) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:44 root] (utils.py 283): INFO Epoch: [8] [1660/2502] eta: 0:10:40 lr: 0.000018 loss_cls: 3.8347 (3.9347) grad_norm: 2.3213 (2.3256) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:52 root] (utils.py 283): INFO Epoch: [8] [1670/2502] eta: 0:10:32 lr: 0.000018 loss_cls: 4.1350 (3.9353) grad_norm: 2.3213 (2.3257) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:00:59 root] (utils.py 283): INFO Epoch: [8] [1680/2502] eta: 0:10:24 lr: 0.000018 loss_cls: 4.1350 (3.9361) grad_norm: 2.3320 (2.3255) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:07 root] (utils.py 283): INFO Epoch: [8] [1690/2502] eta: 0:10:17 lr: 0.000018 loss_cls: 4.0180 (3.9355) grad_norm: 2.3428 (2.3256) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:14 root] (utils.py 283): INFO Epoch: [8] [1700/2502] eta: 0:10:09 lr: 0.000018 loss_cls: 3.9801 (3.9363) grad_norm: 2.2770 (2.3253) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:22 root] (utils.py 283): INFO Epoch: [8] [1710/2502] eta: 0:10:02 lr: 0.000018 loss_cls: 4.1193 (3.9369) grad_norm: 2.2266 (2.3247) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:30 root] (utils.py 283): INFO Epoch: [8] [1720/2502] eta: 0:09:54 lr: 0.000018 loss_cls: 4.1434 (3.9377) grad_norm: 2.2902 (2.3254) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:38 root] (utils.py 283): INFO Epoch: [8] [1730/2502] eta: 0:09:47 lr: 0.000018 loss_cls: 4.1434 (3.9371) grad_norm: 2.4646 (2.3262) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:45 root] (utils.py 283): INFO Epoch: [8] [1740/2502] eta: 0:09:39 lr: 0.000018 loss_cls: 4.1144 (3.9369) grad_norm: 2.3904 (2.3262) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 16:01:53 root] (utils.py 283): INFO Epoch: [8] [1750/2502] eta: 0:09:31 lr: 0.000018 loss_cls: 4.2479 (3.9392) grad_norm: 2.3198 (2.3262) time: 0.7778 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:01 root] (utils.py 283): INFO Epoch: [8] [1760/2502] eta: 0:09:24 lr: 0.000018 loss_cls: 4.3298 (3.9383) grad_norm: 2.3017 (2.3261) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:09 root] (utils.py 283): INFO Epoch: [8] [1770/2502] eta: 0:09:16 lr: 0.000018 loss_cls: 3.7201 (3.9388) grad_norm: 2.3365 (2.3263) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:16 root] (utils.py 283): INFO Epoch: [8] [1780/2502] eta: 0:09:09 lr: 0.000018 loss_cls: 3.7201 (3.9369) grad_norm: 2.3289 (2.3263) time: 0.7792 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:24 root] (utils.py 283): INFO Epoch: [8] [1790/2502] eta: 0:09:01 lr: 0.000018 loss_cls: 4.0448 (3.9376) grad_norm: 2.3112 (2.3268) time: 0.7820 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:32 root] (utils.py 283): INFO Epoch: [8] [1800/2502] eta: 0:08:54 lr: 0.000018 loss_cls: 4.1855 (3.9396) grad_norm: 2.3450 (2.3268) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:40 root] (utils.py 283): INFO Epoch: [8] [1810/2502] eta: 0:08:46 lr: 0.000018 loss_cls: 4.3190 (3.9402) grad_norm: 2.3520 (2.3271) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:47 root] (utils.py 283): INFO Epoch: [8] [1820/2502] eta: 0:08:39 lr: 0.000018 loss_cls: 4.0999 (3.9402) grad_norm: 2.3520 (2.3274) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:02:55 root] (utils.py 283): INFO Epoch: [8] [1830/2502] eta: 0:08:31 lr: 0.000018 loss_cls: 3.7603 (3.9401) grad_norm: 2.2752 (2.3271) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:03 root] (utils.py 283): INFO Epoch: [8] [1840/2502] eta: 0:08:23 lr: 0.000018 loss_cls: 3.6480 (3.9384) grad_norm: 2.2770 (2.3269) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:10 root] (utils.py 283): INFO Epoch: [8] [1850/2502] eta: 0:08:16 lr: 0.000018 loss_cls: 3.9258 (3.9391) grad_norm: 2.3105 (2.3274) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:18 root] (utils.py 283): INFO Epoch: [8] [1860/2502] eta: 0:08:08 lr: 0.000018 loss_cls: 4.2549 (3.9405) grad_norm: 2.3093 (2.3275) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:26 root] (utils.py 283): INFO Epoch: [8] [1870/2502] eta: 0:08:01 lr: 0.000018 loss_cls: 3.9642 (3.9380) grad_norm: 2.3054 (2.3275) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:33 root] (utils.py 283): INFO Epoch: [8] [1880/2502] eta: 0:07:53 lr: 0.000018 loss_cls: 4.0901 (3.9401) grad_norm: 2.2975 (2.3275) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:41 root] (utils.py 283): INFO Epoch: [8] [1890/2502] eta: 0:07:45 lr: 0.000018 loss_cls: 4.2636 (3.9406) grad_norm: 2.2518 (2.3271) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:48 root] (utils.py 283): INFO Epoch: [8] [1900/2502] eta: 0:07:38 lr: 0.000018 loss_cls: 4.1523 (3.9404) grad_norm: 2.2903 (2.3275) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 16:03:56 root] (utils.py 283): INFO Epoch: [8] [1910/2502] eta: 0:07:30 lr: 0.000018 loss_cls: 4.1080 (3.9416) grad_norm: 2.3694 (2.3278) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:04 root] (utils.py 283): INFO Epoch: [8] [1920/2502] eta: 0:07:23 lr: 0.000018 loss_cls: 4.0493 (3.9418) grad_norm: 2.3322 (2.3279) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:11 root] (utils.py 283): INFO Epoch: [8] [1930/2502] eta: 0:07:15 lr: 0.000018 loss_cls: 3.9888 (3.9413) grad_norm: 2.3340 (2.3279) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:19 root] (utils.py 283): INFO Epoch: [8] [1940/2502] eta: 0:07:07 lr: 0.000018 loss_cls: 3.8594 (3.9406) grad_norm: 2.3340 (2.3280) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:27 root] (utils.py 283): INFO Epoch: [8] [1950/2502] eta: 0:07:00 lr: 0.000018 loss_cls: 3.6452 (3.9400) grad_norm: 2.3312 (2.3280) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:34 root] (utils.py 283): INFO Epoch: [8] [1960/2502] eta: 0:06:52 lr: 0.000018 loss_cls: 3.7682 (3.9416) grad_norm: 2.3477 (2.3280) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:42 root] (utils.py 283): INFO Epoch: [8] [1970/2502] eta: 0:06:45 lr: 0.000018 loss_cls: 4.3079 (3.9420) grad_norm: 2.3113 (2.3281) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:50 root] (utils.py 283): INFO Epoch: [8] [1980/2502] eta: 0:06:37 lr: 0.000018 loss_cls: 4.0166 (3.9418) grad_norm: 2.2795 (2.3278) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:04:57 root] (utils.py 283): INFO Epoch: [8] [1990/2502] eta: 0:06:29 lr: 0.000018 loss_cls: 4.0166 (3.9424) grad_norm: 2.3149 (2.3281) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:05 root] (utils.py 283): INFO Epoch: [8] [2000/2502] eta: 0:06:22 lr: 0.000018 loss_cls: 4.2833 (3.9427) grad_norm: 2.3149 (2.3282) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:13 root] (utils.py 283): INFO Epoch: [8] [2010/2502] eta: 0:06:14 lr: 0.000018 loss_cls: 3.6564 (3.9410) grad_norm: 2.3393 (2.3284) time: 0.7723 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:20 root] (utils.py 283): INFO Epoch: [8] [2020/2502] eta: 0:06:07 lr: 0.000018 loss_cls: 3.4985 (3.9391) grad_norm: 2.3602 (2.3290) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:28 root] (utils.py 283): INFO Epoch: [8] [2030/2502] eta: 0:05:59 lr: 0.000018 loss_cls: 3.9408 (3.9397) grad_norm: 2.3619 (2.3291) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:36 root] (utils.py 283): INFO Epoch: [8] [2040/2502] eta: 0:05:51 lr: 0.000018 loss_cls: 4.1054 (3.9395) grad_norm: 2.3619 (2.3291) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:43 root] (utils.py 283): INFO Epoch: [8] [2050/2502] eta: 0:05:44 lr: 0.000018 loss_cls: 3.9875 (3.9403) grad_norm: 2.2810 (2.3288) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:51 root] (utils.py 283): INFO Epoch: [8] [2060/2502] eta: 0:05:36 lr: 0.000018 loss_cls: 4.1024 (3.9397) grad_norm: 2.2833 (2.3288) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 16:05:59 root] (utils.py 283): INFO Epoch: [8] [2070/2502] eta: 0:05:29 lr: 0.000018 loss_cls: 4.0970 (3.9399) grad_norm: 2.3297 (2.3288) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:06 root] (utils.py 283): INFO Epoch: [8] [2080/2502] eta: 0:05:21 lr: 0.000018 loss_cls: 3.7388 (3.9400) grad_norm: 2.3248 (2.3288) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:14 root] (utils.py 283): INFO Epoch: [8] [2090/2502] eta: 0:05:13 lr: 0.000018 loss_cls: 3.9012 (3.9408) grad_norm: 2.2848 (2.3287) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:22 root] (utils.py 283): INFO Epoch: [8] [2100/2502] eta: 0:05:06 lr: 0.000018 loss_cls: 3.8744 (3.9387) grad_norm: 2.3193 (2.3290) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:29 root] (utils.py 283): INFO Epoch: [8] [2110/2502] eta: 0:04:58 lr: 0.000018 loss_cls: 3.6140 (3.9384) grad_norm: 2.3839 (2.3293) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:37 root] (utils.py 283): INFO Epoch: [8] [2120/2502] eta: 0:04:50 lr: 0.000018 loss_cls: 3.8397 (3.9388) grad_norm: 2.3667 (2.3295) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:44 root] (utils.py 283): INFO Epoch: [8] [2130/2502] eta: 0:04:43 lr: 0.000018 loss_cls: 3.9480 (3.9385) grad_norm: 2.3091 (2.3297) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:06:52 root] (utils.py 283): INFO Epoch: [8] [2140/2502] eta: 0:04:35 lr: 0.000018 loss_cls: 3.8641 (3.9384) grad_norm: 2.3356 (2.3298) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:00 root] (utils.py 283): INFO Epoch: [8] [2150/2502] eta: 0:04:28 lr: 0.000018 loss_cls: 3.8045 (3.9376) grad_norm: 2.3080 (2.3298) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:07 root] (utils.py 283): INFO Epoch: [8] [2160/2502] eta: 0:04:20 lr: 0.000018 loss_cls: 4.2292 (3.9385) grad_norm: 2.3159 (2.3300) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:15 root] (utils.py 283): INFO Epoch: [8] [2170/2502] eta: 0:04:12 lr: 0.000018 loss_cls: 4.3582 (3.9404) grad_norm: 2.3499 (2.3299) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:23 root] (utils.py 283): INFO Epoch: [8] [2180/2502] eta: 0:04:05 lr: 0.000018 loss_cls: 4.2514 (3.9391) grad_norm: 2.3763 (2.3303) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:30 root] (utils.py 283): INFO Epoch: [8] [2190/2502] eta: 0:03:57 lr: 0.000018 loss_cls: 4.2166 (3.9393) grad_norm: 2.3763 (2.3302) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:38 root] (utils.py 283): INFO Epoch: [8] [2200/2502] eta: 0:03:50 lr: 0.000018 loss_cls: 4.2166 (3.9387) grad_norm: 2.2808 (2.3300) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:46 root] (utils.py 283): INFO Epoch: [8] [2210/2502] eta: 0:03:42 lr: 0.000018 loss_cls: 4.0430 (3.9394) grad_norm: 2.3342 (2.3302) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 16:07:53 root] (utils.py 283): INFO Epoch: [8] [2220/2502] eta: 0:03:34 lr: 0.000018 loss_cls: 4.0313 (3.9388) grad_norm: 2.2874 (2.3300) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:01 root] (utils.py 283): INFO Epoch: [8] [2230/2502] eta: 0:03:27 lr: 0.000018 loss_cls: 3.9823 (3.9387) grad_norm: 2.2716 (2.3299) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:09 root] (utils.py 283): INFO Epoch: [8] [2240/2502] eta: 0:03:19 lr: 0.000018 loss_cls: 4.2288 (3.9397) grad_norm: 2.3369 (2.3300) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:16 root] (utils.py 283): INFO Epoch: [8] [2250/2502] eta: 0:03:11 lr: 0.000018 loss_cls: 4.4017 (3.9416) grad_norm: 2.4098 (2.3304) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:24 root] (utils.py 283): INFO Epoch: [8] [2260/2502] eta: 0:03:04 lr: 0.000018 loss_cls: 4.4213 (3.9433) grad_norm: 2.3688 (2.3305) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:31 root] (utils.py 283): INFO Epoch: [8] [2270/2502] eta: 0:02:56 lr: 0.000018 loss_cls: 4.1799 (3.9432) grad_norm: 2.3496 (2.3304) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:39 root] (utils.py 283): INFO Epoch: [8] [2280/2502] eta: 0:02:49 lr: 0.000018 loss_cls: 3.7367 (3.9425) grad_norm: 2.3023 (2.3303) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:47 root] (utils.py 283): INFO Epoch: [8] [2290/2502] eta: 0:02:41 lr: 0.000018 loss_cls: 3.7367 (3.9417) grad_norm: 2.3023 (2.3301) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:08:54 root] (utils.py 283): INFO Epoch: [8] [2300/2502] eta: 0:02:33 lr: 0.000018 loss_cls: 4.0404 (3.9424) grad_norm: 2.3224 (2.3302) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:02 root] (utils.py 283): INFO Epoch: [8] [2310/2502] eta: 0:02:26 lr: 0.000018 loss_cls: 4.0763 (3.9429) grad_norm: 2.3666 (2.3301) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:10 root] (utils.py 283): INFO Epoch: [8] [2320/2502] eta: 0:02:18 lr: 0.000018 loss_cls: 4.2535 (3.9449) grad_norm: 2.3527 (2.3300) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:17 root] (utils.py 283): INFO Epoch: [8] [2330/2502] eta: 0:02:11 lr: 0.000018 loss_cls: 4.3438 (3.9458) grad_norm: 2.3103 (2.3299) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:25 root] (utils.py 283): INFO Epoch: [8] [2340/2502] eta: 0:02:03 lr: 0.000018 loss_cls: 3.8731 (3.9452) grad_norm: 2.2875 (2.3298) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:33 root] (utils.py 283): INFO Epoch: [8] [2350/2502] eta: 0:01:55 lr: 0.000018 loss_cls: 4.0364 (3.9462) grad_norm: 2.2456 (2.3294) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:40 root] (utils.py 283): INFO Epoch: [8] [2360/2502] eta: 0:01:48 lr: 0.000018 loss_cls: 4.1506 (3.9458) grad_norm: 2.3186 (2.3296) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:48 root] (utils.py 283): INFO Epoch: [8] [2370/2502] eta: 0:01:40 lr: 0.000018 loss_cls: 4.1359 (3.9465) grad_norm: 2.3186 (2.3296) time: 0.7723 data: 0.0002 max mem: 8426 +[2024-12-10 16:09:56 root] (utils.py 283): INFO Epoch: [8] [2380/2502] eta: 0:01:32 lr: 0.000018 loss_cls: 4.1768 (3.9476) grad_norm: 2.3005 (2.3296) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:03 root] (utils.py 283): INFO Epoch: [8] [2390/2502] eta: 0:01:25 lr: 0.000018 loss_cls: 4.2310 (3.9476) grad_norm: 2.3039 (2.3294) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:11 root] (utils.py 283): INFO Epoch: [8] [2400/2502] eta: 0:01:17 lr: 0.000018 loss_cls: 4.2278 (3.9477) grad_norm: 2.2610 (2.3292) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:19 root] (utils.py 283): INFO Epoch: [8] [2410/2502] eta: 0:01:10 lr: 0.000018 loss_cls: 3.9472 (3.9470) grad_norm: 2.2791 (2.3293) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:26 root] (utils.py 283): INFO Epoch: [8] [2420/2502] eta: 0:01:02 lr: 0.000018 loss_cls: 4.0672 (3.9477) grad_norm: 2.3316 (2.3294) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:34 root] (utils.py 283): INFO Epoch: [8] [2430/2502] eta: 0:00:54 lr: 0.000018 loss_cls: 4.2309 (3.9489) grad_norm: 2.3057 (2.3293) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:42 root] (utils.py 283): INFO Epoch: [8] [2440/2502] eta: 0:00:47 lr: 0.000018 loss_cls: 4.2608 (3.9486) grad_norm: 2.3057 (2.3294) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:49 root] (utils.py 283): INFO Epoch: [8] [2450/2502] eta: 0:00:39 lr: 0.000018 loss_cls: 4.1954 (3.9496) grad_norm: 2.3426 (2.3297) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 16:10:57 root] (utils.py 283): INFO Epoch: [8] [2460/2502] eta: 0:00:32 lr: 0.000018 loss_cls: 4.2256 (3.9491) grad_norm: 2.3555 (2.3301) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:11:04 root] (utils.py 283): INFO Epoch: [8] [2470/2502] eta: 0:00:24 lr: 0.000018 loss_cls: 4.1956 (3.9492) grad_norm: 2.3455 (2.3302) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 16:11:12 root] (utils.py 283): INFO Epoch: [8] [2480/2502] eta: 0:00:16 lr: 0.000018 loss_cls: 3.9628 (3.9489) grad_norm: 2.2893 (2.3301) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 16:11:20 root] (utils.py 283): INFO Epoch: [8] [2490/2502] eta: 0:00:09 lr: 0.000018 loss_cls: 4.0588 (3.9497) grad_norm: 2.2939 (2.3302) time: 0.7849 data: 0.0242 max mem: 8426 +[2024-12-10 16:11:28 root] (utils.py 283): INFO Epoch: [8] [2500/2502] eta: 0:00:01 lr: 0.000018 loss_cls: 4.3695 (3.9517) grad_norm: 2.3434 (2.3303) time: 0.7846 data: 0.0242 max mem: 8426 +[2024-12-10 16:11:29 root] (utils.py 283): INFO Epoch: [8] [2501/2502] eta: 0:00:00 lr: 0.000018 loss_cls: 4.3695 (3.9519) grad_norm: 2.3062 (2.3302) time: 0.7845 data: 0.0242 max mem: 8426 +[2024-12-10 16:11:29 root] (utils.py 297): INFO Epoch: [8] Total time: 0:31:47 (0.7624 s / it) +[2024-12-10 16:11:29 root] (engine.py 179): INFO Averaged stats:lr: 0.000018 loss_cls: 4.3695 (3.9555) grad_norm: 2.3062 (2.3302) +[2024-12-10 16:11:29 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6076 (0.6076) acc1: 89.8438 (89.8438) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1276 data: 0.0006 max mem: 8426 +[2024-12-10 16:11:30 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7521 (0.8065) acc1: 85.9375 (82.7415) acc3: 93.7500 (93.2528) acc5: 97.6562 (96.8750) time: 0.1278 data: 0.0003 max mem: 8426 +[2024-12-10 16:11:32 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8534 (0.8723) acc1: 79.6875 (80.7292) acc3: 92.9688 (92.6339) acc5: 96.0938 (95.8705) time: 0.1280 data: 0.0003 max mem: 8426 +[2024-12-10 16:11:33 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9692 (0.8829) acc1: 78.9062 (79.9143) acc3: 92.9688 (92.9688) acc5: 96.0938 (95.9677) time: 0.1308 data: 0.0004 max mem: 8426 +[2024-12-10 16:11:34 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8285 (0.8715) acc1: 80.4688 (80.4497) acc3: 93.7500 (93.0640) acc5: 96.8750 (95.9794) time: 0.1380 data: 0.0067 max mem: 8426 +[2024-12-10 16:11:36 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0669 (0.9554) acc1: 76.5625 (78.5080) acc3: 89.0625 (91.6207) acc5: 92.1875 (94.8989) time: 0.1358 data: 0.0071 max mem: 8426 +[2024-12-10 16:11:38 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2521 (1.0050) acc1: 71.0938 (77.7152) acc3: 85.9375 (90.6378) acc5: 89.8438 (94.0446) time: 0.1642 data: 0.0366 max mem: 8426 +[2024-12-10 16:11:39 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2460 (1.0443) acc1: 73.4375 (76.8816) acc3: 85.9375 (90.0418) acc5: 89.8438 (93.4859) time: 0.1638 data: 0.0361 max mem: 8426 +[2024-12-10 16:11:40 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2400 (1.0778) acc1: 73.4375 (76.1478) acc3: 86.7188 (89.5351) acc5: 89.8438 (93.0459) time: 0.1282 data: 0.0006 max mem: 8426 +[2024-12-10 16:11:42 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2553 (1.1064) acc1: 71.8750 (75.3348) acc3: 85.1562 (89.0968) acc5: 89.8438 (92.8056) time: 0.1325 data: 0.0049 max mem: 8426 +[2024-12-10 16:11:42 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1485 (1.0940) acc1: 71.8750 (75.6080) acc3: 88.2812 (89.3040) acc5: 92.9688 (92.9920) time: 0.1305 data: 0.0048 max mem: 8426 +[2024-12-10 16:11:42 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1381 s / it) +[2024-12-10 16:11:44 root] (engine.py 264): INFO * Acc@1 75.452 Acc@3 89.464 Acc@5 92.830 loss 1.095 flops 1.285 layer_flops 1.251 +[2024-12-10 16:11:44 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 16:11:44 root] (main.py 576): INFO Max accuracy: 75.57% +[2024-12-10 16:11:44 root] (utils.py 283): INFO Epoch: [9] [ 0/2502] eta: 0:31:29 lr: 0.000017 loss_cls: 3.8270 (3.8270) grad_norm: 2.4435 (2.4435) time: 0.7554 data: 0.0003 max mem: 8426 +[2024-12-10 16:11:52 root] (utils.py 283): INFO Epoch: [9] [ 10/2502] eta: 0:31:37 lr: 0.000017 loss_cls: 4.0959 (4.0037) grad_norm: 2.3875 (2.3874) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 16:12:00 root] (utils.py 283): INFO Epoch: [9] [ 20/2502] eta: 0:31:33 lr: 0.000017 loss_cls: 4.0753 (3.9219) grad_norm: 2.3381 (2.3533) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:12:07 root] (utils.py 283): INFO Epoch: [9] [ 30/2502] eta: 0:31:24 lr: 0.000017 loss_cls: 3.8137 (3.8589) grad_norm: 2.3069 (2.3396) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:12:15 root] (utils.py 283): INFO Epoch: [9] [ 40/2502] eta: 0:31:15 lr: 0.000017 loss_cls: 3.8013 (3.8815) grad_norm: 2.2827 (2.3203) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 16:12:22 root] (utils.py 283): INFO Epoch: [9] [ 50/2502] eta: 0:31:05 lr: 0.000017 loss_cls: 3.5830 (3.8170) grad_norm: 2.2827 (2.3288) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 16:12:30 root] (utils.py 283): INFO Epoch: [9] [ 60/2502] eta: 0:30:59 lr: 0.000017 loss_cls: 4.2678 (3.8833) grad_norm: 2.3652 (2.3348) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 16:12:38 root] (utils.py 283): INFO Epoch: [9] [ 70/2502] eta: 0:30:52 lr: 0.000017 loss_cls: 4.2559 (3.8603) grad_norm: 2.3474 (2.3368) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 16:12:45 root] (utils.py 283): INFO Epoch: [9] [ 80/2502] eta: 0:30:43 lr: 0.000017 loss_cls: 3.8679 (3.8437) grad_norm: 2.3324 (2.3433) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 16:12:53 root] (utils.py 283): INFO Epoch: [9] [ 90/2502] eta: 0:30:35 lr: 0.000017 loss_cls: 3.8272 (3.8583) grad_norm: 2.3446 (2.3433) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:00 root] (utils.py 283): INFO Epoch: [9] [ 100/2502] eta: 0:30:27 lr: 0.000017 loss_cls: 3.8272 (3.8584) grad_norm: 2.3749 (2.3491) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:08 root] (utils.py 283): INFO Epoch: [9] [ 110/2502] eta: 0:30:20 lr: 0.000017 loss_cls: 4.2648 (3.8981) grad_norm: 2.3554 (2.3556) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:16 root] (utils.py 283): INFO Epoch: [9] [ 120/2502] eta: 0:30:13 lr: 0.000017 loss_cls: 4.1638 (3.9169) grad_norm: 2.3216 (2.3536) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:23 root] (utils.py 283): INFO Epoch: [9] [ 130/2502] eta: 0:30:06 lr: 0.000017 loss_cls: 4.0824 (3.9138) grad_norm: 2.3085 (2.3538) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:31 root] (utils.py 283): INFO Epoch: [9] [ 140/2502] eta: 0:29:59 lr: 0.000017 loss_cls: 4.2691 (3.9368) grad_norm: 2.2848 (2.3516) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:39 root] (utils.py 283): INFO Epoch: [9] [ 150/2502] eta: 0:29:51 lr: 0.000017 loss_cls: 4.2114 (3.9377) grad_norm: 2.3106 (2.3532) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:46 root] (utils.py 283): INFO Epoch: [9] [ 160/2502] eta: 0:29:43 lr: 0.000017 loss_cls: 3.6679 (3.9265) grad_norm: 2.4183 (2.3594) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 16:13:54 root] (utils.py 283): INFO Epoch: [9] [ 170/2502] eta: 0:29:37 lr: 0.000017 loss_cls: 3.5291 (3.9043) grad_norm: 2.4043 (2.3589) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 16:14:02 root] (utils.py 283): INFO Epoch: [9] [ 180/2502] eta: 0:29:29 lr: 0.000017 loss_cls: 3.5604 (3.8887) grad_norm: 2.2524 (2.3532) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:09 root] (utils.py 283): INFO Epoch: [9] [ 190/2502] eta: 0:29:22 lr: 0.000017 loss_cls: 3.6295 (3.8876) grad_norm: 2.2725 (2.3496) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:17 root] (utils.py 283): INFO Epoch: [9] [ 200/2502] eta: 0:29:14 lr: 0.000017 loss_cls: 4.0065 (3.9008) grad_norm: 2.3070 (2.3496) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:24 root] (utils.py 283): INFO Epoch: [9] [ 210/2502] eta: 0:29:06 lr: 0.000017 loss_cls: 4.1763 (3.8978) grad_norm: 2.2945 (2.3459) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:32 root] (utils.py 283): INFO Epoch: [9] [ 220/2502] eta: 0:28:59 lr: 0.000017 loss_cls: 4.0199 (3.9073) grad_norm: 2.2845 (2.3428) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:40 root] (utils.py 283): INFO Epoch: [9] [ 230/2502] eta: 0:28:51 lr: 0.000017 loss_cls: 4.0199 (3.9016) grad_norm: 2.2872 (2.3414) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:47 root] (utils.py 283): INFO Epoch: [9] [ 240/2502] eta: 0:28:44 lr: 0.000017 loss_cls: 4.1041 (3.9097) grad_norm: 2.3168 (2.3422) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 16:14:55 root] (utils.py 283): INFO Epoch: [9] [ 250/2502] eta: 0:28:37 lr: 0.000017 loss_cls: 4.2797 (3.9196) grad_norm: 2.3254 (2.3407) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:03 root] (utils.py 283): INFO Epoch: [9] [ 260/2502] eta: 0:28:29 lr: 0.000017 loss_cls: 4.1164 (3.9174) grad_norm: 2.2714 (2.3371) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:10 root] (utils.py 283): INFO Epoch: [9] [ 270/2502] eta: 0:28:22 lr: 0.000017 loss_cls: 4.0971 (3.9216) grad_norm: 2.2337 (2.3363) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:18 root] (utils.py 283): INFO Epoch: [9] [ 280/2502] eta: 0:28:14 lr: 0.000017 loss_cls: 4.1316 (3.9241) grad_norm: 2.3122 (2.3361) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:26 root] (utils.py 283): INFO Epoch: [9] [ 290/2502] eta: 0:28:08 lr: 0.000017 loss_cls: 4.2651 (3.9352) grad_norm: 2.3171 (2.3351) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:33 root] (utils.py 283): INFO Epoch: [9] [ 300/2502] eta: 0:28:00 lr: 0.000017 loss_cls: 4.1303 (3.9324) grad_norm: 2.3317 (2.3360) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:41 root] (utils.py 283): INFO Epoch: [9] [ 310/2502] eta: 0:27:52 lr: 0.000017 loss_cls: 3.8447 (3.9208) grad_norm: 2.3137 (2.3351) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:49 root] (utils.py 283): INFO Epoch: [9] [ 320/2502] eta: 0:27:45 lr: 0.000017 loss_cls: 3.6987 (3.9207) grad_norm: 2.3163 (2.3362) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 16:15:56 root] (utils.py 283): INFO Epoch: [9] [ 330/2502] eta: 0:27:37 lr: 0.000017 loss_cls: 4.0827 (3.9274) grad_norm: 2.2890 (2.3337) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:04 root] (utils.py 283): INFO Epoch: [9] [ 340/2502] eta: 0:27:30 lr: 0.000017 loss_cls: 4.1349 (3.9258) grad_norm: 2.2749 (2.3327) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:11 root] (utils.py 283): INFO Epoch: [9] [ 350/2502] eta: 0:27:22 lr: 0.000017 loss_cls: 4.0476 (3.9254) grad_norm: 2.3003 (2.3336) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:19 root] (utils.py 283): INFO Epoch: [9] [ 360/2502] eta: 0:27:14 lr: 0.000017 loss_cls: 3.4604 (3.9115) grad_norm: 2.3685 (2.3345) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:27 root] (utils.py 283): INFO Epoch: [9] [ 370/2502] eta: 0:27:06 lr: 0.000017 loss_cls: 3.8142 (3.9116) grad_norm: 2.3114 (2.3323) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:34 root] (utils.py 283): INFO Epoch: [9] [ 380/2502] eta: 0:26:59 lr: 0.000017 loss_cls: 3.9895 (3.9117) grad_norm: 2.2641 (2.3306) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:42 root] (utils.py 283): INFO Epoch: [9] [ 390/2502] eta: 0:26:51 lr: 0.000017 loss_cls: 4.0868 (3.9130) grad_norm: 2.2751 (2.3291) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:50 root] (utils.py 283): INFO Epoch: [9] [ 400/2502] eta: 0:26:44 lr: 0.000017 loss_cls: 4.1344 (3.9175) grad_norm: 2.2821 (2.3285) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 16:16:57 root] (utils.py 283): INFO Epoch: [9] [ 410/2502] eta: 0:26:37 lr: 0.000017 loss_cls: 3.9346 (3.9147) grad_norm: 2.3292 (2.3275) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 16:17:05 root] (utils.py 283): INFO Epoch: [9] [ 420/2502] eta: 0:26:29 lr: 0.000017 loss_cls: 4.0596 (3.9181) grad_norm: 2.3378 (2.3282) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 16:17:13 root] (utils.py 283): INFO Epoch: [9] [ 430/2502] eta: 0:26:21 lr: 0.000017 loss_cls: 4.1855 (3.9173) grad_norm: 2.3526 (2.3285) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 16:17:20 root] (utils.py 283): INFO Epoch: [9] [ 440/2502] eta: 0:26:14 lr: 0.000017 loss_cls: 4.1869 (3.9187) grad_norm: 2.3595 (2.3302) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 16:17:28 root] (utils.py 283): INFO Epoch: [9] [ 450/2502] eta: 0:26:06 lr: 0.000017 loss_cls: 4.1869 (3.9184) grad_norm: 2.3484 (2.3307) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:17:36 root] (utils.py 283): INFO Epoch: [9] [ 460/2502] eta: 0:25:58 lr: 0.000017 loss_cls: 3.8946 (3.9165) grad_norm: 2.3484 (2.3301) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:17:43 root] (utils.py 283): INFO Epoch: [9] [ 470/2502] eta: 0:25:51 lr: 0.000017 loss_cls: 3.8946 (3.9193) grad_norm: 2.2964 (2.3303) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 16:17:51 root] (utils.py 283): INFO Epoch: [9] [ 480/2502] eta: 0:25:43 lr: 0.000017 loss_cls: 4.0772 (3.9181) grad_norm: 2.2964 (2.3310) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:17:58 root] (utils.py 283): INFO Epoch: [9] [ 490/2502] eta: 0:25:35 lr: 0.000017 loss_cls: 4.0940 (3.9230) grad_norm: 2.2880 (2.3311) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:06 root] (utils.py 283): INFO Epoch: [9] [ 500/2502] eta: 0:25:28 lr: 0.000017 loss_cls: 4.1004 (3.9263) grad_norm: 2.3541 (2.3321) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:14 root] (utils.py 283): INFO Epoch: [9] [ 510/2502] eta: 0:25:20 lr: 0.000017 loss_cls: 3.9766 (3.9237) grad_norm: 2.3982 (2.3337) time: 0.7601 data: 0.0003 max mem: 8426 +[2024-12-10 16:18:21 root] (utils.py 283): INFO Epoch: [9] [ 520/2502] eta: 0:25:12 lr: 0.000017 loss_cls: 3.7078 (3.9214) grad_norm: 2.3798 (2.3346) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:29 root] (utils.py 283): INFO Epoch: [9] [ 530/2502] eta: 0:25:04 lr: 0.000017 loss_cls: 3.6760 (3.9150) grad_norm: 2.3798 (2.3363) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:36 root] (utils.py 283): INFO Epoch: [9] [ 540/2502] eta: 0:24:57 lr: 0.000017 loss_cls: 3.6760 (3.9123) grad_norm: 2.3334 (2.3362) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:44 root] (utils.py 283): INFO Epoch: [9] [ 550/2502] eta: 0:24:49 lr: 0.000017 loss_cls: 3.9281 (3.9131) grad_norm: 2.3372 (2.3392) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:52 root] (utils.py 283): INFO Epoch: [9] [ 560/2502] eta: 0:24:41 lr: 0.000017 loss_cls: 4.2358 (3.9126) grad_norm: 2.4344 (2.3399) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 16:18:59 root] (utils.py 283): INFO Epoch: [9] [ 570/2502] eta: 0:24:34 lr: 0.000017 loss_cls: 4.2090 (3.9151) grad_norm: 2.3347 (2.3399) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 16:19:07 root] (utils.py 283): INFO Epoch: [9] [ 580/2502] eta: 0:24:27 lr: 0.000017 loss_cls: 4.1497 (3.9153) grad_norm: 2.3406 (2.3402) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 16:19:15 root] (utils.py 283): INFO Epoch: [9] [ 590/2502] eta: 0:24:19 lr: 0.000017 loss_cls: 4.1497 (3.9177) grad_norm: 2.3894 (2.3408) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-10 16:19:22 root] (utils.py 283): INFO Epoch: [9] [ 600/2502] eta: 0:24:12 lr: 0.000017 loss_cls: 4.0307 (3.9121) grad_norm: 2.3590 (2.3407) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 16:19:30 root] (utils.py 283): INFO Epoch: [9] [ 610/2502] eta: 0:24:04 lr: 0.000017 loss_cls: 3.7877 (3.9136) grad_norm: 2.3507 (2.3418) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 16:19:38 root] (utils.py 283): INFO Epoch: [9] [ 620/2502] eta: 0:23:56 lr: 0.000017 loss_cls: 3.8894 (3.9113) grad_norm: 2.4013 (2.3429) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 16:19:45 root] (utils.py 283): INFO Epoch: [9] [ 630/2502] eta: 0:23:49 lr: 0.000017 loss_cls: 4.1700 (3.9120) grad_norm: 2.3920 (2.3439) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 16:19:53 root] (utils.py 283): INFO Epoch: [9] [ 640/2502] eta: 0:23:41 lr: 0.000017 loss_cls: 4.0850 (3.9129) grad_norm: 2.3722 (2.3441) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:01 root] (utils.py 283): INFO Epoch: [9] [ 650/2502] eta: 0:23:33 lr: 0.000017 loss_cls: 3.8382 (3.9092) grad_norm: 2.3722 (2.3442) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:08 root] (utils.py 283): INFO Epoch: [9] [ 660/2502] eta: 0:23:26 lr: 0.000017 loss_cls: 3.8411 (3.9097) grad_norm: 2.3233 (2.3432) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:16 root] (utils.py 283): INFO Epoch: [9] [ 670/2502] eta: 0:23:18 lr: 0.000017 loss_cls: 3.9146 (3.9076) grad_norm: 2.3411 (2.3445) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:23 root] (utils.py 283): INFO Epoch: [9] [ 680/2502] eta: 0:23:10 lr: 0.000017 loss_cls: 4.2106 (3.9109) grad_norm: 2.3766 (2.3453) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:31 root] (utils.py 283): INFO Epoch: [9] [ 690/2502] eta: 0:23:03 lr: 0.000017 loss_cls: 4.2966 (3.9097) grad_norm: 2.3065 (2.3447) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:39 root] (utils.py 283): INFO Epoch: [9] [ 700/2502] eta: 0:22:55 lr: 0.000017 loss_cls: 3.9263 (3.9105) grad_norm: 2.2695 (2.3437) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:46 root] (utils.py 283): INFO Epoch: [9] [ 710/2502] eta: 0:22:48 lr: 0.000017 loss_cls: 4.1569 (3.9145) grad_norm: 2.3159 (2.3441) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 16:20:54 root] (utils.py 283): INFO Epoch: [9] [ 720/2502] eta: 0:22:40 lr: 0.000017 loss_cls: 4.0481 (3.9167) grad_norm: 2.3319 (2.3441) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:02 root] (utils.py 283): INFO Epoch: [9] [ 730/2502] eta: 0:22:32 lr: 0.000017 loss_cls: 3.9399 (3.9156) grad_norm: 2.3319 (2.3446) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:09 root] (utils.py 283): INFO Epoch: [9] [ 740/2502] eta: 0:22:25 lr: 0.000017 loss_cls: 3.9253 (3.9169) grad_norm: 2.3576 (2.3443) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:17 root] (utils.py 283): INFO Epoch: [9] [ 750/2502] eta: 0:22:17 lr: 0.000017 loss_cls: 4.0896 (3.9192) grad_norm: 2.3115 (2.3431) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:25 root] (utils.py 283): INFO Epoch: [9] [ 760/2502] eta: 0:22:09 lr: 0.000017 loss_cls: 4.0896 (3.9202) grad_norm: 2.2504 (2.3425) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:32 root] (utils.py 283): INFO Epoch: [9] [ 770/2502] eta: 0:22:02 lr: 0.000017 loss_cls: 3.5572 (3.9152) grad_norm: 2.3206 (2.3426) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:40 root] (utils.py 283): INFO Epoch: [9] [ 780/2502] eta: 0:21:54 lr: 0.000017 loss_cls: 3.9734 (3.9152) grad_norm: 2.3556 (2.3430) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:47 root] (utils.py 283): INFO Epoch: [9] [ 790/2502] eta: 0:21:46 lr: 0.000017 loss_cls: 4.1396 (3.9174) grad_norm: 2.2941 (2.3430) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 16:21:55 root] (utils.py 283): INFO Epoch: [9] [ 800/2502] eta: 0:21:39 lr: 0.000017 loss_cls: 4.3847 (3.9205) grad_norm: 2.3240 (2.3435) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:03 root] (utils.py 283): INFO Epoch: [9] [ 810/2502] eta: 0:21:31 lr: 0.000017 loss_cls: 4.3030 (3.9218) grad_norm: 2.3662 (2.3432) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:10 root] (utils.py 283): INFO Epoch: [9] [ 820/2502] eta: 0:21:24 lr: 0.000017 loss_cls: 4.1561 (3.9202) grad_norm: 2.3662 (2.3436) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:18 root] (utils.py 283): INFO Epoch: [9] [ 830/2502] eta: 0:21:16 lr: 0.000017 loss_cls: 4.2255 (3.9230) grad_norm: 2.4032 (2.3448) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:26 root] (utils.py 283): INFO Epoch: [9] [ 840/2502] eta: 0:21:08 lr: 0.000017 loss_cls: 4.2462 (3.9216) grad_norm: 2.4086 (2.3453) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:33 root] (utils.py 283): INFO Epoch: [9] [ 850/2502] eta: 0:21:01 lr: 0.000017 loss_cls: 3.8831 (3.9229) grad_norm: 2.3685 (2.3452) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:41 root] (utils.py 283): INFO Epoch: [9] [ 860/2502] eta: 0:20:53 lr: 0.000017 loss_cls: 4.1684 (3.9212) grad_norm: 2.3368 (2.3454) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:49 root] (utils.py 283): INFO Epoch: [9] [ 870/2502] eta: 0:20:45 lr: 0.000017 loss_cls: 4.0581 (3.9205) grad_norm: 2.3965 (2.3461) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:22:56 root] (utils.py 283): INFO Epoch: [9] [ 880/2502] eta: 0:20:38 lr: 0.000017 loss_cls: 4.0782 (3.9199) grad_norm: 2.3811 (2.3461) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:04 root] (utils.py 283): INFO Epoch: [9] [ 890/2502] eta: 0:20:30 lr: 0.000017 loss_cls: 3.8576 (3.9165) grad_norm: 2.2787 (2.3450) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:11 root] (utils.py 283): INFO Epoch: [9] [ 900/2502] eta: 0:20:22 lr: 0.000017 loss_cls: 3.7640 (3.9148) grad_norm: 2.2824 (2.3451) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:19 root] (utils.py 283): INFO Epoch: [9] [ 910/2502] eta: 0:20:15 lr: 0.000017 loss_cls: 4.0603 (3.9133) grad_norm: 2.2871 (2.3449) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:27 root] (utils.py 283): INFO Epoch: [9] [ 920/2502] eta: 0:20:07 lr: 0.000017 loss_cls: 4.0689 (3.9143) grad_norm: 2.3481 (2.3466) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:34 root] (utils.py 283): INFO Epoch: [9] [ 930/2502] eta: 0:20:00 lr: 0.000017 loss_cls: 3.9421 (3.9151) grad_norm: 2.3899 (2.3470) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:42 root] (utils.py 283): INFO Epoch: [9] [ 940/2502] eta: 0:19:52 lr: 0.000017 loss_cls: 4.0294 (3.9172) grad_norm: 2.3100 (2.3474) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:50 root] (utils.py 283): INFO Epoch: [9] [ 950/2502] eta: 0:19:44 lr: 0.000017 loss_cls: 4.3492 (3.9185) grad_norm: 2.3100 (2.3468) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 16:23:57 root] (utils.py 283): INFO Epoch: [9] [ 960/2502] eta: 0:19:37 lr: 0.000017 loss_cls: 4.0137 (3.9192) grad_norm: 2.2623 (2.3461) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:05 root] (utils.py 283): INFO Epoch: [9] [ 970/2502] eta: 0:19:29 lr: 0.000017 loss_cls: 3.9324 (3.9185) grad_norm: 2.2637 (2.3457) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:13 root] (utils.py 283): INFO Epoch: [9] [ 980/2502] eta: 0:19:21 lr: 0.000017 loss_cls: 3.7496 (3.9155) grad_norm: 2.2777 (2.3450) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:20 root] (utils.py 283): INFO Epoch: [9] [ 990/2502] eta: 0:19:14 lr: 0.000017 loss_cls: 3.8549 (3.9147) grad_norm: 2.2563 (2.3440) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:28 root] (utils.py 283): INFO Epoch: [9] [1000/2502] eta: 0:19:06 lr: 0.000017 loss_cls: 3.9618 (3.9166) grad_norm: 2.2421 (2.3431) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:35 root] (utils.py 283): INFO Epoch: [9] [1010/2502] eta: 0:18:58 lr: 0.000017 loss_cls: 3.9618 (3.9148) grad_norm: 2.3180 (2.3435) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:43 root] (utils.py 283): INFO Epoch: [9] [1020/2502] eta: 0:18:51 lr: 0.000017 loss_cls: 4.0205 (3.9155) grad_norm: 2.3825 (2.3435) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:51 root] (utils.py 283): INFO Epoch: [9] [1030/2502] eta: 0:18:43 lr: 0.000017 loss_cls: 4.0027 (3.9158) grad_norm: 2.3648 (2.3436) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 16:24:58 root] (utils.py 283): INFO Epoch: [9] [1040/2502] eta: 0:18:36 lr: 0.000017 loss_cls: 3.8328 (3.9167) grad_norm: 2.2653 (2.3426) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 16:25:06 root] (utils.py 283): INFO Epoch: [9] [1050/2502] eta: 0:18:28 lr: 0.000017 loss_cls: 4.0947 (3.9181) grad_norm: 2.2644 (2.3422) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 16:25:14 root] (utils.py 283): INFO Epoch: [9] [1060/2502] eta: 0:18:20 lr: 0.000017 loss_cls: 4.0619 (3.9197) grad_norm: 2.2895 (2.3419) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:25:21 root] (utils.py 283): INFO Epoch: [9] [1070/2502] eta: 0:18:13 lr: 0.000017 loss_cls: 4.0619 (3.9197) grad_norm: 2.2895 (2.3418) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:25:29 root] (utils.py 283): INFO Epoch: [9] [1080/2502] eta: 0:18:05 lr: 0.000017 loss_cls: 4.0760 (3.9214) grad_norm: 2.2742 (2.3417) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 16:25:37 root] (utils.py 283): INFO Epoch: [9] [1090/2502] eta: 0:17:57 lr: 0.000017 loss_cls: 4.3726 (3.9250) grad_norm: 2.2916 (2.3419) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 16:25:44 root] (utils.py 283): INFO Epoch: [9] [1100/2502] eta: 0:17:50 lr: 0.000017 loss_cls: 4.4411 (3.9271) grad_norm: 2.3642 (2.3419) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 16:25:52 root] (utils.py 283): INFO Epoch: [9] [1110/2502] eta: 0:17:42 lr: 0.000017 loss_cls: 4.2080 (3.9283) grad_norm: 2.3661 (2.3419) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:25:59 root] (utils.py 283): INFO Epoch: [9] [1120/2502] eta: 0:17:34 lr: 0.000017 loss_cls: 3.9666 (3.9281) grad_norm: 2.3033 (2.3417) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:07 root] (utils.py 283): INFO Epoch: [9] [1130/2502] eta: 0:17:27 lr: 0.000017 loss_cls: 3.9666 (3.9266) grad_norm: 2.3374 (2.3421) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:15 root] (utils.py 283): INFO Epoch: [9] [1140/2502] eta: 0:17:19 lr: 0.000017 loss_cls: 3.9986 (3.9264) grad_norm: 2.3462 (2.3415) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:22 root] (utils.py 283): INFO Epoch: [9] [1150/2502] eta: 0:17:12 lr: 0.000017 loss_cls: 4.1916 (3.9272) grad_norm: 2.2515 (2.3409) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:30 root] (utils.py 283): INFO Epoch: [9] [1160/2502] eta: 0:17:04 lr: 0.000017 loss_cls: 4.3826 (3.9308) grad_norm: 2.3251 (2.3411) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:38 root] (utils.py 283): INFO Epoch: [9] [1170/2502] eta: 0:16:56 lr: 0.000017 loss_cls: 4.3386 (3.9303) grad_norm: 2.3082 (2.3412) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:45 root] (utils.py 283): INFO Epoch: [9] [1180/2502] eta: 0:16:49 lr: 0.000017 loss_cls: 3.8944 (3.9313) grad_norm: 2.2807 (2.3415) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 16:26:53 root] (utils.py 283): INFO Epoch: [9] [1190/2502] eta: 0:16:41 lr: 0.000017 loss_cls: 4.0103 (3.9312) grad_norm: 2.3264 (2.3417) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:01 root] (utils.py 283): INFO Epoch: [9] [1200/2502] eta: 0:16:33 lr: 0.000017 loss_cls: 4.0794 (3.9323) grad_norm: 2.2998 (2.3419) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:08 root] (utils.py 283): INFO Epoch: [9] [1210/2502] eta: 0:16:26 lr: 0.000017 loss_cls: 4.0388 (3.9311) grad_norm: 2.2982 (2.3413) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:16 root] (utils.py 283): INFO Epoch: [9] [1220/2502] eta: 0:16:18 lr: 0.000017 loss_cls: 3.9694 (3.9319) grad_norm: 2.2473 (2.3405) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:23 root] (utils.py 283): INFO Epoch: [9] [1230/2502] eta: 0:16:11 lr: 0.000017 loss_cls: 3.9805 (3.9312) grad_norm: 2.2699 (2.3405) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:31 root] (utils.py 283): INFO Epoch: [9] [1240/2502] eta: 0:16:03 lr: 0.000017 loss_cls: 3.9210 (3.9299) grad_norm: 2.3145 (2.3408) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:39 root] (utils.py 283): INFO Epoch: [9] [1250/2502] eta: 0:15:55 lr: 0.000017 loss_cls: 4.0235 (3.9316) grad_norm: 2.3227 (2.3408) time: 0.7564 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:46 root] (utils.py 283): INFO Epoch: [9] [1260/2502] eta: 0:15:47 lr: 0.000017 loss_cls: 4.1054 (3.9314) grad_norm: 2.3359 (2.3408) time: 0.7554 data: 0.0002 max mem: 8426 +[2024-12-10 16:27:54 root] (utils.py 283): INFO Epoch: [9] [1270/2502] eta: 0:15:40 lr: 0.000017 loss_cls: 3.6314 (3.9279) grad_norm: 2.3359 (2.3409) time: 0.7543 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:01 root] (utils.py 283): INFO Epoch: [9] [1280/2502] eta: 0:15:32 lr: 0.000017 loss_cls: 3.7074 (3.9273) grad_norm: 2.3485 (2.3412) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:09 root] (utils.py 283): INFO Epoch: [9] [1290/2502] eta: 0:15:24 lr: 0.000017 loss_cls: 3.9149 (3.9275) grad_norm: 2.3429 (2.3408) time: 0.7568 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:16 root] (utils.py 283): INFO Epoch: [9] [1300/2502] eta: 0:15:17 lr: 0.000017 loss_cls: 3.9149 (3.9271) grad_norm: 2.3223 (2.3408) time: 0.7545 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:24 root] (utils.py 283): INFO Epoch: [9] [1310/2502] eta: 0:15:09 lr: 0.000017 loss_cls: 4.0796 (3.9279) grad_norm: 2.3416 (2.3414) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:31 root] (utils.py 283): INFO Epoch: [9] [1320/2502] eta: 0:15:01 lr: 0.000017 loss_cls: 4.2956 (3.9296) grad_norm: 2.3499 (2.3411) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:39 root] (utils.py 283): INFO Epoch: [9] [1330/2502] eta: 0:14:54 lr: 0.000017 loss_cls: 4.0715 (3.9276) grad_norm: 2.2670 (2.3408) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:47 root] (utils.py 283): INFO Epoch: [9] [1340/2502] eta: 0:14:46 lr: 0.000017 loss_cls: 3.8716 (3.9272) grad_norm: 2.2448 (2.3402) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 16:28:54 root] (utils.py 283): INFO Epoch: [9] [1350/2502] eta: 0:14:38 lr: 0.000017 loss_cls: 4.3503 (3.9303) grad_norm: 2.2921 (2.3402) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:02 root] (utils.py 283): INFO Epoch: [9] [1360/2502] eta: 0:14:31 lr: 0.000017 loss_cls: 4.3387 (3.9292) grad_norm: 2.3533 (2.3406) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:09 root] (utils.py 283): INFO Epoch: [9] [1370/2502] eta: 0:14:23 lr: 0.000017 loss_cls: 3.9369 (3.9299) grad_norm: 2.3596 (2.3404) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:17 root] (utils.py 283): INFO Epoch: [9] [1380/2502] eta: 0:14:15 lr: 0.000017 loss_cls: 4.1803 (3.9305) grad_norm: 2.2419 (2.3405) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:25 root] (utils.py 283): INFO Epoch: [9] [1390/2502] eta: 0:14:08 lr: 0.000017 loss_cls: 4.1594 (3.9307) grad_norm: 2.2419 (2.3402) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:32 root] (utils.py 283): INFO Epoch: [9] [1400/2502] eta: 0:14:00 lr: 0.000017 loss_cls: 4.0537 (3.9304) grad_norm: 2.2739 (2.3398) time: 0.7554 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:40 root] (utils.py 283): INFO Epoch: [9] [1410/2502] eta: 0:13:53 lr: 0.000017 loss_cls: 3.8849 (3.9300) grad_norm: 2.3006 (2.3398) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:48 root] (utils.py 283): INFO Epoch: [9] [1420/2502] eta: 0:13:45 lr: 0.000017 loss_cls: 3.8977 (3.9313) grad_norm: 2.3335 (2.3401) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:29:55 root] (utils.py 283): INFO Epoch: [9] [1430/2502] eta: 0:13:37 lr: 0.000017 loss_cls: 3.8977 (3.9306) grad_norm: 2.3761 (2.3405) time: 0.7558 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:03 root] (utils.py 283): INFO Epoch: [9] [1440/2502] eta: 0:13:29 lr: 0.000017 loss_cls: 3.9983 (3.9315) grad_norm: 2.3402 (2.3408) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:10 root] (utils.py 283): INFO Epoch: [9] [1450/2502] eta: 0:13:22 lr: 0.000017 loss_cls: 4.1821 (3.9330) grad_norm: 2.3647 (2.3411) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:18 root] (utils.py 283): INFO Epoch: [9] [1460/2502] eta: 0:13:14 lr: 0.000017 loss_cls: 4.1581 (3.9317) grad_norm: 2.3747 (2.3414) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:25 root] (utils.py 283): INFO Epoch: [9] [1470/2502] eta: 0:13:06 lr: 0.000017 loss_cls: 4.0508 (3.9322) grad_norm: 2.3682 (2.3413) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:33 root] (utils.py 283): INFO Epoch: [9] [1480/2502] eta: 0:12:59 lr: 0.000017 loss_cls: 3.7193 (3.9312) grad_norm: 2.2602 (2.3409) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:41 root] (utils.py 283): INFO Epoch: [9] [1490/2502] eta: 0:12:51 lr: 0.000017 loss_cls: 4.2251 (3.9339) grad_norm: 2.2567 (2.3404) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:48 root] (utils.py 283): INFO Epoch: [9] [1500/2502] eta: 0:12:44 lr: 0.000017 loss_cls: 4.2409 (3.9349) grad_norm: 2.2785 (2.3406) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 16:30:56 root] (utils.py 283): INFO Epoch: [9] [1510/2502] eta: 0:12:36 lr: 0.000017 loss_cls: 4.0074 (3.9335) grad_norm: 2.2529 (2.3403) time: 0.7539 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:03 root] (utils.py 283): INFO Epoch: [9] [1520/2502] eta: 0:12:28 lr: 0.000017 loss_cls: 3.4509 (3.9320) grad_norm: 2.2752 (2.3401) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:11 root] (utils.py 283): INFO Epoch: [9] [1530/2502] eta: 0:12:21 lr: 0.000017 loss_cls: 3.8443 (3.9324) grad_norm: 2.2888 (2.3399) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:19 root] (utils.py 283): INFO Epoch: [9] [1540/2502] eta: 0:12:13 lr: 0.000017 loss_cls: 4.1004 (3.9326) grad_norm: 2.2667 (2.3394) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:26 root] (utils.py 283): INFO Epoch: [9] [1550/2502] eta: 0:12:05 lr: 0.000017 loss_cls: 4.0415 (3.9321) grad_norm: 2.2361 (2.3387) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:34 root] (utils.py 283): INFO Epoch: [9] [1560/2502] eta: 0:11:58 lr: 0.000017 loss_cls: 3.7475 (3.9318) grad_norm: 2.2572 (2.3384) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:42 root] (utils.py 283): INFO Epoch: [9] [1570/2502] eta: 0:11:50 lr: 0.000017 loss_cls: 4.0039 (3.9315) grad_norm: 2.3524 (2.3388) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:49 root] (utils.py 283): INFO Epoch: [9] [1580/2502] eta: 0:11:43 lr: 0.000017 loss_cls: 4.0039 (3.9335) grad_norm: 2.4024 (2.3390) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 16:31:57 root] (utils.py 283): INFO Epoch: [9] [1590/2502] eta: 0:11:35 lr: 0.000017 loss_cls: 4.1470 (3.9332) grad_norm: 2.2995 (2.3385) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:32:05 root] (utils.py 283): INFO Epoch: [9] [1600/2502] eta: 0:11:27 lr: 0.000017 loss_cls: 4.1301 (3.9324) grad_norm: 2.2675 (2.3381) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 16:32:12 root] (utils.py 283): INFO Epoch: [9] [1610/2502] eta: 0:11:20 lr: 0.000017 loss_cls: 3.9476 (3.9313) grad_norm: 2.2392 (2.3377) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 16:32:20 root] (utils.py 283): INFO Epoch: [9] [1620/2502] eta: 0:11:12 lr: 0.000017 loss_cls: 3.9686 (3.9328) grad_norm: 2.2655 (2.3372) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:32:28 root] (utils.py 283): INFO Epoch: [9] [1630/2502] eta: 0:11:05 lr: 0.000017 loss_cls: 4.1548 (3.9338) grad_norm: 2.3382 (2.3371) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 16:32:35 root] (utils.py 283): INFO Epoch: [9] [1640/2502] eta: 0:10:57 lr: 0.000017 loss_cls: 4.2305 (3.9349) grad_norm: 2.3428 (2.3371) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 16:32:43 root] (utils.py 283): INFO Epoch: [9] [1650/2502] eta: 0:10:49 lr: 0.000017 loss_cls: 4.2147 (3.9359) grad_norm: 2.4034 (2.3377) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 16:32:51 root] (utils.py 283): INFO Epoch: [9] [1660/2502] eta: 0:10:42 lr: 0.000017 loss_cls: 3.8803 (3.9344) grad_norm: 2.4125 (2.3375) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:32:58 root] (utils.py 283): INFO Epoch: [9] [1670/2502] eta: 0:10:34 lr: 0.000017 loss_cls: 3.8803 (3.9348) grad_norm: 2.3164 (2.3375) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:06 root] (utils.py 283): INFO Epoch: [9] [1680/2502] eta: 0:10:26 lr: 0.000017 loss_cls: 4.0268 (3.9353) grad_norm: 2.2778 (2.3372) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:13 root] (utils.py 283): INFO Epoch: [9] [1690/2502] eta: 0:10:19 lr: 0.000017 loss_cls: 4.0374 (3.9354) grad_norm: 2.2845 (2.3371) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:21 root] (utils.py 283): INFO Epoch: [9] [1700/2502] eta: 0:10:11 lr: 0.000017 loss_cls: 4.0374 (3.9351) grad_norm: 2.3101 (2.3370) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:29 root] (utils.py 283): INFO Epoch: [9] [1710/2502] eta: 0:10:04 lr: 0.000017 loss_cls: 4.0252 (3.9352) grad_norm: 2.3486 (2.3373) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:37 root] (utils.py 283): INFO Epoch: [9] [1720/2502] eta: 0:09:56 lr: 0.000017 loss_cls: 3.9829 (3.9340) grad_norm: 2.3541 (2.3375) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:45 root] (utils.py 283): INFO Epoch: [9] [1730/2502] eta: 0:09:49 lr: 0.000017 loss_cls: 4.1769 (3.9349) grad_norm: 2.3436 (2.3378) time: 0.7827 data: 0.0002 max mem: 8426 +[2024-12-10 16:33:52 root] (utils.py 283): INFO Epoch: [9] [1740/2502] eta: 0:09:41 lr: 0.000017 loss_cls: 3.7909 (3.9338) grad_norm: 2.3436 (2.3382) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:00 root] (utils.py 283): INFO Epoch: [9] [1750/2502] eta: 0:09:34 lr: 0.000017 loss_cls: 4.0761 (3.9350) grad_norm: 2.3869 (2.3384) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:08 root] (utils.py 283): INFO Epoch: [9] [1760/2502] eta: 0:09:26 lr: 0.000017 loss_cls: 4.1583 (3.9361) grad_norm: 2.3869 (2.3385) time: 0.7797 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:16 root] (utils.py 283): INFO Epoch: [9] [1770/2502] eta: 0:09:18 lr: 0.000017 loss_cls: 3.9164 (3.9345) grad_norm: 2.3370 (2.3386) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:24 root] (utils.py 283): INFO Epoch: [9] [1780/2502] eta: 0:09:11 lr: 0.000017 loss_cls: 3.7472 (3.9341) grad_norm: 2.3252 (2.3387) time: 0.7831 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:31 root] (utils.py 283): INFO Epoch: [9] [1790/2502] eta: 0:09:03 lr: 0.000017 loss_cls: 4.1788 (3.9368) grad_norm: 2.2865 (2.3384) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:39 root] (utils.py 283): INFO Epoch: [9] [1800/2502] eta: 0:08:56 lr: 0.000017 loss_cls: 4.3090 (3.9375) grad_norm: 2.2722 (2.3384) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:47 root] (utils.py 283): INFO Epoch: [9] [1810/2502] eta: 0:08:48 lr: 0.000017 loss_cls: 3.8184 (3.9369) grad_norm: 2.3493 (2.3388) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 16:34:54 root] (utils.py 283): INFO Epoch: [9] [1820/2502] eta: 0:08:40 lr: 0.000017 loss_cls: 3.7902 (3.9368) grad_norm: 2.3681 (2.3394) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:02 root] (utils.py 283): INFO Epoch: [9] [1830/2502] eta: 0:08:33 lr: 0.000017 loss_cls: 4.1100 (3.9368) grad_norm: 2.4370 (2.3397) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:10 root] (utils.py 283): INFO Epoch: [9] [1840/2502] eta: 0:08:25 lr: 0.000017 loss_cls: 3.9913 (3.9361) grad_norm: 2.3480 (2.3395) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:17 root] (utils.py 283): INFO Epoch: [9] [1850/2502] eta: 0:08:17 lr: 0.000017 loss_cls: 3.7505 (3.9352) grad_norm: 2.3027 (2.3396) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:25 root] (utils.py 283): INFO Epoch: [9] [1860/2502] eta: 0:08:10 lr: 0.000017 loss_cls: 3.7505 (3.9349) grad_norm: 2.3944 (2.3399) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:33 root] (utils.py 283): INFO Epoch: [9] [1870/2502] eta: 0:08:02 lr: 0.000017 loss_cls: 3.6556 (3.9337) grad_norm: 2.4041 (2.3401) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:40 root] (utils.py 283): INFO Epoch: [9] [1880/2502] eta: 0:07:55 lr: 0.000017 loss_cls: 3.7190 (3.9345) grad_norm: 2.3213 (2.3400) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:48 root] (utils.py 283): INFO Epoch: [9] [1890/2502] eta: 0:07:47 lr: 0.000017 loss_cls: 4.2516 (3.9353) grad_norm: 2.2941 (2.3397) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 16:35:55 root] (utils.py 283): INFO Epoch: [9] [1900/2502] eta: 0:07:39 lr: 0.000017 loss_cls: 3.9904 (3.9347) grad_norm: 2.2525 (2.3392) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:03 root] (utils.py 283): INFO Epoch: [9] [1910/2502] eta: 0:07:32 lr: 0.000017 loss_cls: 3.9353 (3.9332) grad_norm: 2.2549 (2.3391) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:11 root] (utils.py 283): INFO Epoch: [9] [1920/2502] eta: 0:07:24 lr: 0.000017 loss_cls: 3.9353 (3.9334) grad_norm: 2.2919 (2.3390) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:18 root] (utils.py 283): INFO Epoch: [9] [1930/2502] eta: 0:07:16 lr: 0.000017 loss_cls: 3.6994 (3.9304) grad_norm: 2.3305 (2.3389) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:26 root] (utils.py 283): INFO Epoch: [9] [1940/2502] eta: 0:07:09 lr: 0.000017 loss_cls: 3.4100 (3.9304) grad_norm: 2.3005 (2.3387) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:34 root] (utils.py 283): INFO Epoch: [9] [1950/2502] eta: 0:07:01 lr: 0.000017 loss_cls: 4.0898 (3.9305) grad_norm: 2.3005 (2.3386) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:41 root] (utils.py 283): INFO Epoch: [9] [1960/2502] eta: 0:06:53 lr: 0.000017 loss_cls: 4.0898 (3.9317) grad_norm: 2.3192 (2.3385) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:49 root] (utils.py 283): INFO Epoch: [9] [1970/2502] eta: 0:06:46 lr: 0.000017 loss_cls: 4.2263 (3.9321) grad_norm: 2.2849 (2.3381) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 16:36:57 root] (utils.py 283): INFO Epoch: [9] [1980/2502] eta: 0:06:38 lr: 0.000017 loss_cls: 4.0669 (3.9319) grad_norm: 2.2608 (2.3380) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:37:04 root] (utils.py 283): INFO Epoch: [9] [1990/2502] eta: 0:06:31 lr: 0.000017 loss_cls: 4.0669 (3.9324) grad_norm: 2.2714 (2.3378) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 16:37:12 root] (utils.py 283): INFO Epoch: [9] [2000/2502] eta: 0:06:23 lr: 0.000017 loss_cls: 4.0671 (3.9324) grad_norm: 2.2736 (2.3375) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 16:37:20 root] (utils.py 283): INFO Epoch: [9] [2010/2502] eta: 0:06:15 lr: 0.000017 loss_cls: 4.0430 (3.9322) grad_norm: 2.2858 (2.3374) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 16:37:27 root] (utils.py 283): INFO Epoch: [9] [2020/2502] eta: 0:06:08 lr: 0.000017 loss_cls: 4.0909 (3.9337) grad_norm: 2.2846 (2.3373) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 16:37:35 root] (utils.py 283): INFO Epoch: [9] [2030/2502] eta: 0:06:00 lr: 0.000017 loss_cls: 4.3517 (3.9352) grad_norm: 2.3594 (2.3376) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 16:37:43 root] (utils.py 283): INFO Epoch: [9] [2040/2502] eta: 0:05:52 lr: 0.000017 loss_cls: 4.0040 (3.9346) grad_norm: 2.3759 (2.3378) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 16:37:50 root] (utils.py 283): INFO Epoch: [9] [2050/2502] eta: 0:05:45 lr: 0.000017 loss_cls: 3.7204 (3.9342) grad_norm: 2.2972 (2.3376) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 16:37:58 root] (utils.py 283): INFO Epoch: [9] [2060/2502] eta: 0:05:37 lr: 0.000017 loss_cls: 4.2088 (3.9354) grad_norm: 2.2409 (2.3371) time: 0.7762 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:06 root] (utils.py 283): INFO Epoch: [9] [2070/2502] eta: 0:05:30 lr: 0.000017 loss_cls: 4.1850 (3.9353) grad_norm: 2.2488 (2.3371) time: 0.7738 data: 0.0003 max mem: 8426 +[2024-12-10 16:38:13 root] (utils.py 283): INFO Epoch: [9] [2080/2502] eta: 0:05:22 lr: 0.000017 loss_cls: 3.9452 (3.9352) grad_norm: 2.2552 (2.3369) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:21 root] (utils.py 283): INFO Epoch: [9] [2090/2502] eta: 0:05:14 lr: 0.000017 loss_cls: 3.8317 (3.9346) grad_norm: 2.2311 (2.3366) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:29 root] (utils.py 283): INFO Epoch: [9] [2100/2502] eta: 0:05:07 lr: 0.000017 loss_cls: 3.7413 (3.9340) grad_norm: 2.2707 (2.3369) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:36 root] (utils.py 283): INFO Epoch: [9] [2110/2502] eta: 0:04:59 lr: 0.000017 loss_cls: 4.0798 (3.9357) grad_norm: 2.3751 (2.3372) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:44 root] (utils.py 283): INFO Epoch: [9] [2120/2502] eta: 0:04:51 lr: 0.000017 loss_cls: 4.0852 (3.9351) grad_norm: 2.3650 (2.3371) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:51 root] (utils.py 283): INFO Epoch: [9] [2130/2502] eta: 0:04:44 lr: 0.000017 loss_cls: 4.0745 (3.9357) grad_norm: 2.3426 (2.3373) time: 0.7548 data: 0.0002 max mem: 8426 +[2024-12-10 16:38:59 root] (utils.py 283): INFO Epoch: [9] [2140/2502] eta: 0:04:36 lr: 0.000017 loss_cls: 4.4395 (3.9364) grad_norm: 2.3444 (2.3374) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:06 root] (utils.py 283): INFO Epoch: [9] [2150/2502] eta: 0:04:28 lr: 0.000017 loss_cls: 4.2935 (3.9367) grad_norm: 2.2777 (2.3371) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:14 root] (utils.py 283): INFO Epoch: [9] [2160/2502] eta: 0:04:21 lr: 0.000017 loss_cls: 4.1609 (3.9361) grad_norm: 2.2863 (2.3369) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:21 root] (utils.py 283): INFO Epoch: [9] [2170/2502] eta: 0:04:13 lr: 0.000017 loss_cls: 3.7892 (3.9363) grad_norm: 2.2950 (2.3370) time: 0.7526 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:29 root] (utils.py 283): INFO Epoch: [9] [2180/2502] eta: 0:04:05 lr: 0.000017 loss_cls: 4.0568 (3.9369) grad_norm: 2.3492 (2.3372) time: 0.7528 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:37 root] (utils.py 283): INFO Epoch: [9] [2190/2502] eta: 0:03:58 lr: 0.000017 loss_cls: 4.0608 (3.9370) grad_norm: 2.3588 (2.3371) time: 0.7536 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:44 root] (utils.py 283): INFO Epoch: [9] [2200/2502] eta: 0:03:50 lr: 0.000017 loss_cls: 4.1049 (3.9374) grad_norm: 2.3168 (2.3374) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:52 root] (utils.py 283): INFO Epoch: [9] [2210/2502] eta: 0:03:42 lr: 0.000017 loss_cls: 4.2401 (3.9377) grad_norm: 2.3193 (2.3375) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 16:39:59 root] (utils.py 283): INFO Epoch: [9] [2220/2502] eta: 0:03:35 lr: 0.000017 loss_cls: 4.4194 (3.9402) grad_norm: 2.3128 (2.3373) time: 0.7583 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:07 root] (utils.py 283): INFO Epoch: [9] [2230/2502] eta: 0:03:27 lr: 0.000017 loss_cls: 4.4194 (3.9418) grad_norm: 2.2933 (2.3371) time: 0.7532 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:14 root] (utils.py 283): INFO Epoch: [9] [2240/2502] eta: 0:03:20 lr: 0.000017 loss_cls: 4.1714 (3.9417) grad_norm: 2.3088 (2.3370) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:22 root] (utils.py 283): INFO Epoch: [9] [2250/2502] eta: 0:03:12 lr: 0.000017 loss_cls: 4.0701 (3.9419) grad_norm: 2.3010 (2.3366) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:30 root] (utils.py 283): INFO Epoch: [9] [2260/2502] eta: 0:03:04 lr: 0.000017 loss_cls: 4.1756 (3.9433) grad_norm: 2.2872 (2.3368) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:37 root] (utils.py 283): INFO Epoch: [9] [2270/2502] eta: 0:02:57 lr: 0.000017 loss_cls: 3.6881 (3.9401) grad_norm: 2.3872 (2.3371) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:45 root] (utils.py 283): INFO Epoch: [9] [2280/2502] eta: 0:02:49 lr: 0.000017 loss_cls: 3.5006 (3.9402) grad_norm: 2.3946 (2.3377) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 16:40:52 root] (utils.py 283): INFO Epoch: [9] [2290/2502] eta: 0:02:41 lr: 0.000017 loss_cls: 3.9863 (3.9394) grad_norm: 2.3805 (2.3379) time: 0.7545 data: 0.0003 max mem: 8426 +[2024-12-10 16:41:00 root] (utils.py 283): INFO Epoch: [9] [2300/2502] eta: 0:02:34 lr: 0.000017 loss_cls: 4.0270 (3.9400) grad_norm: 2.3643 (2.3379) time: 0.7568 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:08 root] (utils.py 283): INFO Epoch: [9] [2310/2502] eta: 0:02:26 lr: 0.000017 loss_cls: 4.0819 (3.9396) grad_norm: 2.3749 (2.3382) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:15 root] (utils.py 283): INFO Epoch: [9] [2320/2502] eta: 0:02:18 lr: 0.000017 loss_cls: 3.7190 (3.9394) grad_norm: 2.3726 (2.3381) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:23 root] (utils.py 283): INFO Epoch: [9] [2330/2502] eta: 0:02:11 lr: 0.000017 loss_cls: 3.9661 (3.9392) grad_norm: 2.3359 (2.3381) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:30 root] (utils.py 283): INFO Epoch: [9] [2340/2502] eta: 0:02:03 lr: 0.000017 loss_cls: 4.0810 (3.9398) grad_norm: 2.3535 (2.3381) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:38 root] (utils.py 283): INFO Epoch: [9] [2350/2502] eta: 0:01:55 lr: 0.000017 loss_cls: 4.2287 (3.9404) grad_norm: 2.4202 (2.3386) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:45 root] (utils.py 283): INFO Epoch: [9] [2360/2502] eta: 0:01:48 lr: 0.000017 loss_cls: 4.0515 (3.9402) grad_norm: 2.4493 (2.3390) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 16:41:53 root] (utils.py 283): INFO Epoch: [9] [2370/2502] eta: 0:01:40 lr: 0.000017 loss_cls: 4.1839 (3.9412) grad_norm: 2.3399 (2.3388) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:01 root] (utils.py 283): INFO Epoch: [9] [2380/2502] eta: 0:01:33 lr: 0.000017 loss_cls: 4.2349 (3.9402) grad_norm: 2.2945 (2.3387) time: 0.7690 data: 0.0003 max mem: 8426 +[2024-12-10 16:42:08 root] (utils.py 283): INFO Epoch: [9] [2390/2502] eta: 0:01:25 lr: 0.000017 loss_cls: 3.9012 (3.9396) grad_norm: 2.2704 (2.3384) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:16 root] (utils.py 283): INFO Epoch: [9] [2400/2502] eta: 0:01:17 lr: 0.000017 loss_cls: 3.6969 (3.9388) grad_norm: 2.2694 (2.3384) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:24 root] (utils.py 283): INFO Epoch: [9] [2410/2502] eta: 0:01:10 lr: 0.000017 loss_cls: 3.8539 (3.9389) grad_norm: 2.2764 (2.3383) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:31 root] (utils.py 283): INFO Epoch: [9] [2420/2502] eta: 0:01:02 lr: 0.000017 loss_cls: 4.1315 (3.9385) grad_norm: 2.3201 (2.3383) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:39 root] (utils.py 283): INFO Epoch: [9] [2430/2502] eta: 0:00:54 lr: 0.000017 loss_cls: 4.1430 (3.9390) grad_norm: 2.3240 (2.3382) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:47 root] (utils.py 283): INFO Epoch: [9] [2440/2502] eta: 0:00:47 lr: 0.000017 loss_cls: 4.2726 (3.9406) grad_norm: 2.3361 (2.3382) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 16:42:54 root] (utils.py 283): INFO Epoch: [9] [2450/2502] eta: 0:00:39 lr: 0.000017 loss_cls: 4.2468 (3.9405) grad_norm: 2.3114 (2.3383) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 16:43:02 root] (utils.py 283): INFO Epoch: [9] [2460/2502] eta: 0:00:32 lr: 0.000017 loss_cls: 4.2468 (3.9424) grad_norm: 2.3114 (2.3380) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 16:43:09 root] (utils.py 283): INFO Epoch: [9] [2470/2502] eta: 0:00:24 lr: 0.000017 loss_cls: 4.0958 (3.9415) grad_norm: 2.2637 (2.3381) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 16:43:17 root] (utils.py 283): INFO Epoch: [9] [2480/2502] eta: 0:00:16 lr: 0.000017 loss_cls: 3.8093 (3.9413) grad_norm: 2.3270 (2.3383) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 16:43:25 root] (utils.py 283): INFO Epoch: [9] [2490/2502] eta: 0:00:09 lr: 0.000017 loss_cls: 4.0608 (3.9417) grad_norm: 2.3306 (2.3382) time: 0.7866 data: 0.0231 max mem: 8426 +[2024-12-10 16:43:33 root] (utils.py 283): INFO Epoch: [9] [2500/2502] eta: 0:00:01 lr: 0.000017 loss_cls: 4.1330 (3.9414) grad_norm: 2.3434 (2.3385) time: 0.7803 data: 0.0231 max mem: 8426 +[2024-12-10 16:43:33 root] (utils.py 283): INFO Epoch: [9] [2501/2502] eta: 0:00:00 lr: 0.000017 loss_cls: 4.1330 (3.9415) grad_norm: 2.3434 (2.3385) time: 0.7808 data: 0.0231 max mem: 8426 +[2024-12-10 16:43:33 root] (utils.py 297): INFO Epoch: [9] Total time: 0:31:49 (0.7634 s / it) +[2024-12-10 16:43:33 root] (engine.py 179): INFO Averaged stats:lr: 0.000017 loss_cls: 4.1330 (3.9434) grad_norm: 2.3434 (2.3385) +[2024-12-10 16:43:34 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6401 (0.6401) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 97.6562 (97.6562) time: 0.1276 data: 0.0003 max mem: 8426 +[2024-12-10 16:43:35 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7224 (0.8123) acc1: 85.1562 (81.5341) acc3: 95.3125 (93.3949) acc5: 96.8750 (96.1648) time: 0.1288 data: 0.0003 max mem: 8426 +[2024-12-10 16:43:36 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8385 (0.8625) acc1: 80.4688 (81.1012) acc3: 92.9688 (92.8199) acc5: 95.3125 (95.5357) time: 0.1285 data: 0.0003 max mem: 8426 +[2024-12-10 16:43:38 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9526 (0.8741) acc1: 80.4688 (80.2671) acc3: 92.9688 (92.8931) acc5: 95.3125 (95.6149) time: 0.1304 data: 0.0004 max mem: 8426 +[2024-12-10 16:43:39 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8233 (0.8617) acc1: 80.4688 (80.8117) acc3: 93.7500 (93.0450) acc5: 96.0938 (95.6745) time: 0.1306 data: 0.0004 max mem: 8426 +[2024-12-10 16:43:40 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0103 (0.9504) acc1: 74.2188 (78.6152) acc3: 89.0625 (91.6054) acc5: 92.1875 (94.5619) time: 0.1283 data: 0.0004 max mem: 8426 +[2024-12-10 16:43:42 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2347 (0.9958) acc1: 71.8750 (78.0225) acc3: 85.9375 (90.7659) acc5: 90.6250 (93.8012) time: 0.1370 data: 0.0087 max mem: 8426 +[2024-12-10 16:43:43 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1799 (1.0341) acc1: 75.0000 (77.1017) acc3: 87.5000 (90.2289) acc5: 90.6250 (93.3209) time: 0.1407 data: 0.0125 max mem: 8426 +[2024-12-10 16:43:45 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2115 (1.0689) acc1: 72.6562 (76.4371) acc3: 85.9375 (89.5930) acc5: 89.8438 (92.8434) time: 0.1414 data: 0.0129 max mem: 8426 +[2024-12-10 16:43:46 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2166 (1.0978) acc1: 70.3125 (75.6525) acc3: 85.9375 (89.2170) acc5: 89.8438 (92.5652) time: 0.1590 data: 0.0302 max mem: 8426 +[2024-12-10 16:43:47 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1700 (1.0856) acc1: 73.4375 (75.9040) acc3: 88.2812 (89.3760) acc5: 91.4062 (92.7760) time: 0.1517 data: 0.0242 max mem: 8426 +[2024-12-10 16:43:47 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1380 s / it) +[2024-12-10 16:43:47 root] (engine.py 264): INFO * Acc@1 75.628 Acc@3 89.552 Acc@5 92.786 loss 1.088 flops 1.285 layer_flops 1.251 +[2024-12-10 16:43:47 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 16:43:48 root] (main.py 576): INFO Max accuracy: 75.63% +[2024-12-10 16:43:48 root] (utils.py 283): INFO Epoch: [10] [ 0/2502] eta: 0:32:01 lr: 0.000016 loss_cls: 3.5026 (3.5026) grad_norm: 2.3268 (2.3268) time: 0.7679 data: 0.0005 max mem: 8426 +[2024-12-10 16:43:56 root] (utils.py 283): INFO Epoch: [10] [ 10/2502] eta: 0:31:30 lr: 0.000016 loss_cls: 3.9702 (3.9172) grad_norm: 2.3400 (2.3517) time: 0.7588 data: 0.0003 max mem: 8426 +[2024-12-10 16:44:04 root] (utils.py 283): INFO Epoch: [10] [ 20/2502] eta: 0:31:26 lr: 0.000016 loss_cls: 4.2038 (3.9494) grad_norm: 2.3400 (2.3627) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:11 root] (utils.py 283): INFO Epoch: [10] [ 30/2502] eta: 0:31:21 lr: 0.000016 loss_cls: 4.0522 (3.9209) grad_norm: 2.3870 (2.3679) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:19 root] (utils.py 283): INFO Epoch: [10] [ 40/2502] eta: 0:31:13 lr: 0.000016 loss_cls: 3.6140 (3.8469) grad_norm: 2.4108 (2.3788) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:26 root] (utils.py 283): INFO Epoch: [10] [ 50/2502] eta: 0:31:05 lr: 0.000016 loss_cls: 3.6140 (3.8362) grad_norm: 2.2935 (2.3499) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:34 root] (utils.py 283): INFO Epoch: [10] [ 60/2502] eta: 0:31:00 lr: 0.000016 loss_cls: 3.9371 (3.8551) grad_norm: 2.2637 (2.3408) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:42 root] (utils.py 283): INFO Epoch: [10] [ 70/2502] eta: 0:30:51 lr: 0.000016 loss_cls: 4.1264 (3.8845) grad_norm: 2.2791 (2.3322) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:49 root] (utils.py 283): INFO Epoch: [10] [ 80/2502] eta: 0:30:44 lr: 0.000016 loss_cls: 4.0498 (3.8972) grad_norm: 2.2469 (2.3272) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:44:57 root] (utils.py 283): INFO Epoch: [10] [ 90/2502] eta: 0:30:38 lr: 0.000016 loss_cls: 3.9470 (3.8803) grad_norm: 2.2336 (2.3183) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:05 root] (utils.py 283): INFO Epoch: [10] [ 100/2502] eta: 0:30:30 lr: 0.000016 loss_cls: 3.4305 (3.8473) grad_norm: 2.2871 (2.3207) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:12 root] (utils.py 283): INFO Epoch: [10] [ 110/2502] eta: 0:30:22 lr: 0.000016 loss_cls: 3.6399 (3.8542) grad_norm: 2.3430 (2.3250) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:20 root] (utils.py 283): INFO Epoch: [10] [ 120/2502] eta: 0:30:15 lr: 0.000016 loss_cls: 3.9695 (3.8587) grad_norm: 2.3585 (2.3249) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:28 root] (utils.py 283): INFO Epoch: [10] [ 130/2502] eta: 0:30:08 lr: 0.000016 loss_cls: 3.9265 (3.8546) grad_norm: 2.3197 (2.3239) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:35 root] (utils.py 283): INFO Epoch: [10] [ 140/2502] eta: 0:30:00 lr: 0.000016 loss_cls: 3.7914 (3.8543) grad_norm: 2.3087 (2.3216) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:43 root] (utils.py 283): INFO Epoch: [10] [ 150/2502] eta: 0:29:53 lr: 0.000016 loss_cls: 4.0936 (3.8710) grad_norm: 2.3087 (2.3248) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:50 root] (utils.py 283): INFO Epoch: [10] [ 160/2502] eta: 0:29:46 lr: 0.000016 loss_cls: 4.3123 (3.8833) grad_norm: 2.2897 (2.3203) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 16:45:58 root] (utils.py 283): INFO Epoch: [10] [ 170/2502] eta: 0:29:37 lr: 0.000016 loss_cls: 3.9934 (3.8687) grad_norm: 2.2329 (2.3185) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:06 root] (utils.py 283): INFO Epoch: [10] [ 180/2502] eta: 0:29:30 lr: 0.000016 loss_cls: 3.9934 (3.8816) grad_norm: 2.2942 (2.3177) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:13 root] (utils.py 283): INFO Epoch: [10] [ 190/2502] eta: 0:29:23 lr: 0.000016 loss_cls: 4.2468 (3.9048) grad_norm: 2.3300 (2.3202) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:21 root] (utils.py 283): INFO Epoch: [10] [ 200/2502] eta: 0:29:16 lr: 0.000016 loss_cls: 4.3603 (3.9237) grad_norm: 2.3130 (2.3170) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:29 root] (utils.py 283): INFO Epoch: [10] [ 210/2502] eta: 0:29:10 lr: 0.000016 loss_cls: 4.3348 (3.9355) grad_norm: 2.3266 (2.3172) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:36 root] (utils.py 283): INFO Epoch: [10] [ 220/2502] eta: 0:29:02 lr: 0.000016 loss_cls: 4.2014 (3.9377) grad_norm: 2.3359 (2.3161) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:44 root] (utils.py 283): INFO Epoch: [10] [ 230/2502] eta: 0:28:54 lr: 0.000016 loss_cls: 3.9924 (3.9431) grad_norm: 2.3418 (2.3189) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 16:46:52 root] (utils.py 283): INFO Epoch: [10] [ 240/2502] eta: 0:28:47 lr: 0.000016 loss_cls: 3.9924 (3.9364) grad_norm: 2.3333 (2.3203) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:00 root] (utils.py 283): INFO Epoch: [10] [ 250/2502] eta: 0:28:41 lr: 0.000016 loss_cls: 3.9579 (3.9336) grad_norm: 2.2884 (2.3200) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:07 root] (utils.py 283): INFO Epoch: [10] [ 260/2502] eta: 0:28:35 lr: 0.000016 loss_cls: 3.9435 (3.9330) grad_norm: 2.3287 (2.3210) time: 0.7814 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:15 root] (utils.py 283): INFO Epoch: [10] [ 270/2502] eta: 0:28:27 lr: 0.000016 loss_cls: 3.9435 (3.9331) grad_norm: 2.3469 (2.3206) time: 0.7712 data: 0.0003 max mem: 8426 +[2024-12-10 16:47:23 root] (utils.py 283): INFO Epoch: [10] [ 280/2502] eta: 0:28:19 lr: 0.000016 loss_cls: 4.1828 (3.9407) grad_norm: 2.3752 (2.3219) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:30 root] (utils.py 283): INFO Epoch: [10] [ 290/2502] eta: 0:28:11 lr: 0.000016 loss_cls: 4.0620 (3.9363) grad_norm: 2.3732 (2.3235) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:38 root] (utils.py 283): INFO Epoch: [10] [ 300/2502] eta: 0:28:04 lr: 0.000016 loss_cls: 3.9276 (3.9352) grad_norm: 2.3908 (2.3270) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:46 root] (utils.py 283): INFO Epoch: [10] [ 310/2502] eta: 0:27:57 lr: 0.000016 loss_cls: 3.8315 (3.9349) grad_norm: 2.4043 (2.3272) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 16:47:53 root] (utils.py 283): INFO Epoch: [10] [ 320/2502] eta: 0:27:49 lr: 0.000016 loss_cls: 4.1869 (3.9409) grad_norm: 2.3456 (2.3276) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:01 root] (utils.py 283): INFO Epoch: [10] [ 330/2502] eta: 0:27:42 lr: 0.000016 loss_cls: 4.1869 (3.9397) grad_norm: 2.2863 (2.3266) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:09 root] (utils.py 283): INFO Epoch: [10] [ 340/2502] eta: 0:27:34 lr: 0.000016 loss_cls: 4.1339 (3.9449) grad_norm: 2.2988 (2.3267) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:16 root] (utils.py 283): INFO Epoch: [10] [ 350/2502] eta: 0:27:26 lr: 0.000016 loss_cls: 4.1339 (3.9437) grad_norm: 2.3140 (2.3262) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:24 root] (utils.py 283): INFO Epoch: [10] [ 360/2502] eta: 0:27:18 lr: 0.000016 loss_cls: 4.0016 (3.9364) grad_norm: 2.3176 (2.3266) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:32 root] (utils.py 283): INFO Epoch: [10] [ 370/2502] eta: 0:27:11 lr: 0.000016 loss_cls: 3.8833 (3.9365) grad_norm: 2.2958 (2.3267) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 16:48:39 root] (utils.py 283): INFO Epoch: [10] [ 380/2502] eta: 0:27:03 lr: 0.000016 loss_cls: 4.0622 (3.9350) grad_norm: 2.3403 (2.3285) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:47 root] (utils.py 283): INFO Epoch: [10] [ 390/2502] eta: 0:26:55 lr: 0.000016 loss_cls: 4.0202 (3.9340) grad_norm: 2.4116 (2.3310) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 16:48:54 root] (utils.py 283): INFO Epoch: [10] [ 400/2502] eta: 0:26:47 lr: 0.000016 loss_cls: 4.0202 (3.9349) grad_norm: 2.3548 (2.3314) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:02 root] (utils.py 283): INFO Epoch: [10] [ 410/2502] eta: 0:26:40 lr: 0.000016 loss_cls: 3.8607 (3.9329) grad_norm: 2.3551 (2.3324) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:10 root] (utils.py 283): INFO Epoch: [10] [ 420/2502] eta: 0:26:33 lr: 0.000016 loss_cls: 4.2448 (3.9435) grad_norm: 2.3619 (2.3331) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:17 root] (utils.py 283): INFO Epoch: [10] [ 430/2502] eta: 0:26:25 lr: 0.000016 loss_cls: 4.3535 (3.9510) grad_norm: 2.2952 (2.3324) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:25 root] (utils.py 283): INFO Epoch: [10] [ 440/2502] eta: 0:26:17 lr: 0.000016 loss_cls: 4.0920 (3.9413) grad_norm: 2.2952 (2.3318) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:33 root] (utils.py 283): INFO Epoch: [10] [ 450/2502] eta: 0:26:10 lr: 0.000016 loss_cls: 3.3127 (3.9350) grad_norm: 2.3582 (2.3337) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:40 root] (utils.py 283): INFO Epoch: [10] [ 460/2502] eta: 0:26:02 lr: 0.000016 loss_cls: 4.0688 (3.9384) grad_norm: 2.2789 (2.3328) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:48 root] (utils.py 283): INFO Epoch: [10] [ 470/2502] eta: 0:25:54 lr: 0.000016 loss_cls: 4.2517 (3.9389) grad_norm: 2.2137 (2.3303) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 16:49:56 root] (utils.py 283): INFO Epoch: [10] [ 480/2502] eta: 0:25:46 lr: 0.000016 loss_cls: 3.9375 (3.9331) grad_norm: 2.2378 (2.3293) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:03 root] (utils.py 283): INFO Epoch: [10] [ 490/2502] eta: 0:25:38 lr: 0.000016 loss_cls: 3.5427 (3.9286) grad_norm: 2.2928 (2.3290) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:11 root] (utils.py 283): INFO Epoch: [10] [ 500/2502] eta: 0:25:31 lr: 0.000016 loss_cls: 3.9112 (3.9243) grad_norm: 2.2685 (2.3283) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:18 root] (utils.py 283): INFO Epoch: [10] [ 510/2502] eta: 0:25:23 lr: 0.000016 loss_cls: 3.6101 (3.9154) grad_norm: 2.2685 (2.3278) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:26 root] (utils.py 283): INFO Epoch: [10] [ 520/2502] eta: 0:25:15 lr: 0.000016 loss_cls: 3.3731 (3.9126) grad_norm: 2.3186 (2.3284) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:34 root] (utils.py 283): INFO Epoch: [10] [ 530/2502] eta: 0:25:08 lr: 0.000016 loss_cls: 4.0381 (3.9144) grad_norm: 2.3161 (2.3283) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:41 root] (utils.py 283): INFO Epoch: [10] [ 540/2502] eta: 0:25:00 lr: 0.000016 loss_cls: 4.0808 (3.9111) grad_norm: 2.3357 (2.3298) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:49 root] (utils.py 283): INFO Epoch: [10] [ 550/2502] eta: 0:24:52 lr: 0.000016 loss_cls: 3.8280 (3.9126) grad_norm: 2.4365 (2.3310) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:50:57 root] (utils.py 283): INFO Epoch: [10] [ 560/2502] eta: 0:24:45 lr: 0.000016 loss_cls: 4.0500 (3.9129) grad_norm: 2.3515 (2.3314) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:04 root] (utils.py 283): INFO Epoch: [10] [ 570/2502] eta: 0:24:37 lr: 0.000016 loss_cls: 4.0938 (3.9175) grad_norm: 2.3102 (2.3312) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:12 root] (utils.py 283): INFO Epoch: [10] [ 580/2502] eta: 0:24:29 lr: 0.000016 loss_cls: 3.9079 (3.9165) grad_norm: 2.3054 (2.3307) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:19 root] (utils.py 283): INFO Epoch: [10] [ 590/2502] eta: 0:24:21 lr: 0.000016 loss_cls: 4.0478 (3.9182) grad_norm: 2.3422 (2.3307) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:27 root] (utils.py 283): INFO Epoch: [10] [ 600/2502] eta: 0:24:13 lr: 0.000016 loss_cls: 4.2252 (3.9226) grad_norm: 2.3529 (2.3306) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:35 root] (utils.py 283): INFO Epoch: [10] [ 610/2502] eta: 0:24:06 lr: 0.000016 loss_cls: 4.2366 (3.9232) grad_norm: 2.3300 (2.3310) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:42 root] (utils.py 283): INFO Epoch: [10] [ 620/2502] eta: 0:23:58 lr: 0.000016 loss_cls: 4.1083 (3.9222) grad_norm: 2.3502 (2.3320) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:50 root] (utils.py 283): INFO Epoch: [10] [ 630/2502] eta: 0:23:50 lr: 0.000016 loss_cls: 3.9987 (3.9201) grad_norm: 2.3567 (2.3323) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 16:51:58 root] (utils.py 283): INFO Epoch: [10] [ 640/2502] eta: 0:23:43 lr: 0.000016 loss_cls: 4.0323 (3.9208) grad_norm: 2.3141 (2.3325) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:05 root] (utils.py 283): INFO Epoch: [10] [ 650/2502] eta: 0:23:35 lr: 0.000016 loss_cls: 4.0323 (3.9193) grad_norm: 2.3586 (2.3331) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:13 root] (utils.py 283): INFO Epoch: [10] [ 660/2502] eta: 0:23:27 lr: 0.000016 loss_cls: 3.9596 (3.9163) grad_norm: 2.3654 (2.3334) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:21 root] (utils.py 283): INFO Epoch: [10] [ 670/2502] eta: 0:23:20 lr: 0.000016 loss_cls: 3.6379 (3.9132) grad_norm: 2.3670 (2.3344) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:28 root] (utils.py 283): INFO Epoch: [10] [ 680/2502] eta: 0:23:12 lr: 0.000016 loss_cls: 3.5282 (3.9093) grad_norm: 2.3670 (2.3344) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:36 root] (utils.py 283): INFO Epoch: [10] [ 690/2502] eta: 0:23:04 lr: 0.000016 loss_cls: 4.0425 (3.9126) grad_norm: 2.3020 (2.3346) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:43 root] (utils.py 283): INFO Epoch: [10] [ 700/2502] eta: 0:22:57 lr: 0.000016 loss_cls: 4.2892 (3.9171) grad_norm: 2.2673 (2.3345) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:51 root] (utils.py 283): INFO Epoch: [10] [ 710/2502] eta: 0:22:49 lr: 0.000016 loss_cls: 4.2481 (3.9166) grad_norm: 2.2673 (2.3343) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 16:52:59 root] (utils.py 283): INFO Epoch: [10] [ 720/2502] eta: 0:22:42 lr: 0.000016 loss_cls: 4.0656 (3.9187) grad_norm: 2.2762 (2.3339) time: 0.7743 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:07 root] (utils.py 283): INFO Epoch: [10] [ 730/2502] eta: 0:22:34 lr: 0.000016 loss_cls: 3.9842 (3.9168) grad_norm: 2.2707 (2.3338) time: 0.7762 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:14 root] (utils.py 283): INFO Epoch: [10] [ 740/2502] eta: 0:22:27 lr: 0.000016 loss_cls: 4.1308 (3.9191) grad_norm: 2.2941 (2.3335) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:22 root] (utils.py 283): INFO Epoch: [10] [ 750/2502] eta: 0:22:19 lr: 0.000016 loss_cls: 3.8636 (3.9137) grad_norm: 2.3564 (2.3339) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:30 root] (utils.py 283): INFO Epoch: [10] [ 760/2502] eta: 0:22:12 lr: 0.000016 loss_cls: 3.5607 (3.9108) grad_norm: 2.3186 (2.3334) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:38 root] (utils.py 283): INFO Epoch: [10] [ 770/2502] eta: 0:22:05 lr: 0.000016 loss_cls: 3.8025 (3.9114) grad_norm: 2.3186 (2.3342) time: 0.7768 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:45 root] (utils.py 283): INFO Epoch: [10] [ 780/2502] eta: 0:21:57 lr: 0.000016 loss_cls: 3.9118 (3.9124) grad_norm: 2.3447 (2.3336) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 16:53:53 root] (utils.py 283): INFO Epoch: [10] [ 790/2502] eta: 0:21:49 lr: 0.000016 loss_cls: 4.0583 (3.9143) grad_norm: 2.3008 (2.3337) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:01 root] (utils.py 283): INFO Epoch: [10] [ 800/2502] eta: 0:21:42 lr: 0.000016 loss_cls: 3.9925 (3.9154) grad_norm: 2.3382 (2.3343) time: 0.7754 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:09 root] (utils.py 283): INFO Epoch: [10] [ 810/2502] eta: 0:21:35 lr: 0.000016 loss_cls: 3.9925 (3.9158) grad_norm: 2.3382 (2.3346) time: 0.7821 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:16 root] (utils.py 283): INFO Epoch: [10] [ 820/2502] eta: 0:21:28 lr: 0.000016 loss_cls: 3.8206 (3.9159) grad_norm: 2.3429 (2.3349) time: 0.7827 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:24 root] (utils.py 283): INFO Epoch: [10] [ 830/2502] eta: 0:21:20 lr: 0.000016 loss_cls: 4.1240 (3.9201) grad_norm: 2.3273 (2.3349) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:32 root] (utils.py 283): INFO Epoch: [10] [ 840/2502] eta: 0:21:12 lr: 0.000016 loss_cls: 4.2073 (3.9214) grad_norm: 2.2828 (2.3342) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:39 root] (utils.py 283): INFO Epoch: [10] [ 850/2502] eta: 0:21:05 lr: 0.000016 loss_cls: 4.3343 (3.9237) grad_norm: 2.3126 (2.3346) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:47 root] (utils.py 283): INFO Epoch: [10] [ 860/2502] eta: 0:20:57 lr: 0.000016 loss_cls: 3.9419 (3.9208) grad_norm: 2.3739 (2.3347) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 16:54:55 root] (utils.py 283): INFO Epoch: [10] [ 870/2502] eta: 0:20:50 lr: 0.000016 loss_cls: 3.4980 (3.9160) grad_norm: 2.3662 (2.3352) time: 0.7743 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:03 root] (utils.py 283): INFO Epoch: [10] [ 880/2502] eta: 0:20:42 lr: 0.000016 loss_cls: 3.6211 (3.9159) grad_norm: 2.3118 (2.3345) time: 0.7701 data: 0.0003 max mem: 8426 +[2024-12-10 16:55:10 root] (utils.py 283): INFO Epoch: [10] [ 890/2502] eta: 0:20:34 lr: 0.000016 loss_cls: 3.7750 (3.9125) grad_norm: 2.3072 (2.3348) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:18 root] (utils.py 283): INFO Epoch: [10] [ 900/2502] eta: 0:20:26 lr: 0.000016 loss_cls: 3.8815 (3.9138) grad_norm: 2.3072 (2.3342) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:25 root] (utils.py 283): INFO Epoch: [10] [ 910/2502] eta: 0:20:19 lr: 0.000016 loss_cls: 4.0055 (3.9146) grad_norm: 2.3449 (2.3346) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:33 root] (utils.py 283): INFO Epoch: [10] [ 920/2502] eta: 0:20:11 lr: 0.000016 loss_cls: 3.9673 (3.9142) grad_norm: 2.3509 (2.3346) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:41 root] (utils.py 283): INFO Epoch: [10] [ 930/2502] eta: 0:20:03 lr: 0.000016 loss_cls: 3.5154 (3.9115) grad_norm: 2.3509 (2.3346) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:48 root] (utils.py 283): INFO Epoch: [10] [ 940/2502] eta: 0:19:56 lr: 0.000016 loss_cls: 3.6845 (3.9116) grad_norm: 2.3016 (2.3340) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 16:55:56 root] (utils.py 283): INFO Epoch: [10] [ 950/2502] eta: 0:19:48 lr: 0.000016 loss_cls: 3.9945 (3.9121) grad_norm: 2.2911 (2.3345) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 16:56:04 root] (utils.py 283): INFO Epoch: [10] [ 960/2502] eta: 0:19:40 lr: 0.000016 loss_cls: 3.8230 (3.9102) grad_norm: 2.3449 (2.3343) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 16:56:11 root] (utils.py 283): INFO Epoch: [10] [ 970/2502] eta: 0:19:32 lr: 0.000016 loss_cls: 3.8037 (3.9084) grad_norm: 2.3449 (2.3340) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 16:56:19 root] (utils.py 283): INFO Epoch: [10] [ 980/2502] eta: 0:19:25 lr: 0.000016 loss_cls: 3.8524 (3.9066) grad_norm: 2.3447 (2.3335) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 16:56:27 root] (utils.py 283): INFO Epoch: [10] [ 990/2502] eta: 0:19:17 lr: 0.000016 loss_cls: 3.9211 (3.9078) grad_norm: 2.2692 (2.3332) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 16:56:34 root] (utils.py 283): INFO Epoch: [10] [1000/2502] eta: 0:19:10 lr: 0.000016 loss_cls: 4.0157 (3.9072) grad_norm: 2.2763 (2.3332) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 16:56:42 root] (utils.py 283): INFO Epoch: [10] [1010/2502] eta: 0:19:02 lr: 0.000016 loss_cls: 4.0157 (3.9073) grad_norm: 2.3188 (2.3331) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-10 16:56:49 root] (utils.py 283): INFO Epoch: [10] [1020/2502] eta: 0:18:54 lr: 0.000016 loss_cls: 3.9526 (3.9071) grad_norm: 2.3028 (2.3329) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 16:56:57 root] (utils.py 283): INFO Epoch: [10] [1030/2502] eta: 0:18:46 lr: 0.000016 loss_cls: 3.8072 (3.9056) grad_norm: 2.2765 (2.3325) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 16:57:05 root] (utils.py 283): INFO Epoch: [10] [1040/2502] eta: 0:18:39 lr: 0.000016 loss_cls: 3.8231 (3.9051) grad_norm: 2.2794 (2.3331) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 16:57:12 root] (utils.py 283): INFO Epoch: [10] [1050/2502] eta: 0:18:31 lr: 0.000016 loss_cls: 3.8231 (3.9035) grad_norm: 2.3107 (2.3331) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 16:57:20 root] (utils.py 283): INFO Epoch: [10] [1060/2502] eta: 0:18:23 lr: 0.000016 loss_cls: 3.7458 (3.9020) grad_norm: 2.2918 (2.3326) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 16:57:28 root] (utils.py 283): INFO Epoch: [10] [1070/2502] eta: 0:18:16 lr: 0.000016 loss_cls: 3.9759 (3.9047) grad_norm: 2.2719 (2.3330) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 16:57:36 root] (utils.py 283): INFO Epoch: [10] [1080/2502] eta: 0:18:08 lr: 0.000016 loss_cls: 4.0845 (3.9045) grad_norm: 2.3328 (2.3330) time: 0.7760 data: 0.0003 max mem: 8426 +[2024-12-10 16:57:43 root] (utils.py 283): INFO Epoch: [10] [1090/2502] eta: 0:18:01 lr: 0.000016 loss_cls: 3.9423 (3.9054) grad_norm: 2.3541 (2.3340) time: 0.7791 data: 0.0002 max mem: 8426 +[2024-12-10 16:57:51 root] (utils.py 283): INFO Epoch: [10] [1100/2502] eta: 0:17:53 lr: 0.000016 loss_cls: 3.9027 (3.9026) grad_norm: 2.3158 (2.3340) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 16:57:59 root] (utils.py 283): INFO Epoch: [10] [1110/2502] eta: 0:17:46 lr: 0.000016 loss_cls: 3.7600 (3.9027) grad_norm: 2.3020 (2.3342) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 16:58:06 root] (utils.py 283): INFO Epoch: [10] [1120/2502] eta: 0:17:38 lr: 0.000016 loss_cls: 4.0520 (3.9039) grad_norm: 2.3387 (2.3345) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 16:58:14 root] (utils.py 283): INFO Epoch: [10] [1130/2502] eta: 0:17:30 lr: 0.000016 loss_cls: 3.7869 (3.9026) grad_norm: 2.3288 (2.3345) time: 0.7735 data: 0.0002 max mem: 8426 +[2024-12-10 16:58:22 root] (utils.py 283): INFO Epoch: [10] [1140/2502] eta: 0:17:23 lr: 0.000016 loss_cls: 3.7045 (3.9013) grad_norm: 2.3288 (2.3348) time: 0.7786 data: 0.0003 max mem: 8426 +[2024-12-10 16:58:30 root] (utils.py 283): INFO Epoch: [10] [1150/2502] eta: 0:17:15 lr: 0.000016 loss_cls: 4.0444 (3.9026) grad_norm: 2.3646 (2.3350) time: 0.7744 data: 0.0003 max mem: 8426 +[2024-12-10 16:58:37 root] (utils.py 283): INFO Epoch: [10] [1160/2502] eta: 0:17:08 lr: 0.000016 loss_cls: 4.1539 (3.9042) grad_norm: 2.2834 (2.3347) time: 0.7746 data: 0.0003 max mem: 8426 +[2024-12-10 16:58:45 root] (utils.py 283): INFO Epoch: [10] [1170/2502] eta: 0:17:00 lr: 0.000016 loss_cls: 3.9336 (3.9034) grad_norm: 2.2776 (2.3349) time: 0.7752 data: 0.0003 max mem: 8426 +[2024-12-10 16:58:53 root] (utils.py 283): INFO Epoch: [10] [1180/2502] eta: 0:16:52 lr: 0.000016 loss_cls: 4.1667 (3.9045) grad_norm: 2.2226 (2.3339) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:00 root] (utils.py 283): INFO Epoch: [10] [1190/2502] eta: 0:16:45 lr: 0.000016 loss_cls: 3.8988 (3.9028) grad_norm: 2.2583 (2.3345) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:08 root] (utils.py 283): INFO Epoch: [10] [1200/2502] eta: 0:16:37 lr: 0.000016 loss_cls: 3.8688 (3.9047) grad_norm: 2.3191 (2.3342) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:16 root] (utils.py 283): INFO Epoch: [10] [1210/2502] eta: 0:16:30 lr: 0.000016 loss_cls: 4.1751 (3.9038) grad_norm: 2.2680 (2.3336) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:23 root] (utils.py 283): INFO Epoch: [10] [1220/2502] eta: 0:16:22 lr: 0.000016 loss_cls: 4.1640 (3.9039) grad_norm: 2.2675 (2.3331) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:31 root] (utils.py 283): INFO Epoch: [10] [1230/2502] eta: 0:16:14 lr: 0.000016 loss_cls: 4.1640 (3.9051) grad_norm: 2.2854 (2.3333) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:39 root] (utils.py 283): INFO Epoch: [10] [1240/2502] eta: 0:16:06 lr: 0.000016 loss_cls: 4.2126 (3.9073) grad_norm: 2.3062 (2.3329) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 16:59:46 root] (utils.py 283): INFO Epoch: [10] [1250/2502] eta: 0:15:59 lr: 0.000016 loss_cls: 4.1768 (3.9082) grad_norm: 2.2945 (2.3337) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 16:59:54 root] (utils.py 283): INFO Epoch: [10] [1260/2502] eta: 0:15:51 lr: 0.000016 loss_cls: 4.1413 (3.9100) grad_norm: 2.3635 (2.3341) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:02 root] (utils.py 283): INFO Epoch: [10] [1270/2502] eta: 0:15:43 lr: 0.000016 loss_cls: 4.1714 (3.9117) grad_norm: 2.2882 (2.3338) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:09 root] (utils.py 283): INFO Epoch: [10] [1280/2502] eta: 0:15:36 lr: 0.000016 loss_cls: 4.1376 (3.9118) grad_norm: 2.3401 (2.3346) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:17 root] (utils.py 283): INFO Epoch: [10] [1290/2502] eta: 0:15:28 lr: 0.000016 loss_cls: 4.1080 (3.9110) grad_norm: 2.3535 (2.3346) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:25 root] (utils.py 283): INFO Epoch: [10] [1300/2502] eta: 0:15:20 lr: 0.000016 loss_cls: 3.8478 (3.9112) grad_norm: 2.3253 (2.3345) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:32 root] (utils.py 283): INFO Epoch: [10] [1310/2502] eta: 0:15:13 lr: 0.000016 loss_cls: 4.1281 (3.9116) grad_norm: 2.3488 (2.3354) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:40 root] (utils.py 283): INFO Epoch: [10] [1320/2502] eta: 0:15:05 lr: 0.000016 loss_cls: 4.1462 (3.9127) grad_norm: 2.3502 (2.3355) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:47 root] (utils.py 283): INFO Epoch: [10] [1330/2502] eta: 0:14:57 lr: 0.000016 loss_cls: 3.8907 (3.9119) grad_norm: 2.3303 (2.3357) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 17:00:55 root] (utils.py 283): INFO Epoch: [10] [1340/2502] eta: 0:14:50 lr: 0.000016 loss_cls: 3.8963 (3.9132) grad_norm: 2.3837 (2.3358) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:03 root] (utils.py 283): INFO Epoch: [10] [1350/2502] eta: 0:14:42 lr: 0.000016 loss_cls: 4.0546 (3.9136) grad_norm: 2.2875 (2.3357) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:10 root] (utils.py 283): INFO Epoch: [10] [1360/2502] eta: 0:14:34 lr: 0.000016 loss_cls: 4.2011 (3.9153) grad_norm: 2.3141 (2.3360) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:18 root] (utils.py 283): INFO Epoch: [10] [1370/2502] eta: 0:14:27 lr: 0.000016 loss_cls: 4.1595 (3.9166) grad_norm: 2.3332 (2.3362) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:26 root] (utils.py 283): INFO Epoch: [10] [1380/2502] eta: 0:14:19 lr: 0.000016 loss_cls: 3.9702 (3.9179) grad_norm: 2.3198 (2.3360) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:33 root] (utils.py 283): INFO Epoch: [10] [1390/2502] eta: 0:14:11 lr: 0.000016 loss_cls: 3.9378 (3.9168) grad_norm: 2.3267 (2.3363) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:41 root] (utils.py 283): INFO Epoch: [10] [1400/2502] eta: 0:14:04 lr: 0.000016 loss_cls: 3.9831 (3.9182) grad_norm: 2.3920 (2.3367) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:49 root] (utils.py 283): INFO Epoch: [10] [1410/2502] eta: 0:13:56 lr: 0.000016 loss_cls: 4.1205 (3.9181) grad_norm: 2.3754 (2.3369) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 17:01:56 root] (utils.py 283): INFO Epoch: [10] [1420/2502] eta: 0:13:48 lr: 0.000016 loss_cls: 4.0263 (3.9186) grad_norm: 2.3313 (2.3364) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:04 root] (utils.py 283): INFO Epoch: [10] [1430/2502] eta: 0:13:41 lr: 0.000016 loss_cls: 4.0263 (3.9190) grad_norm: 2.2868 (2.3365) time: 0.7745 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:12 root] (utils.py 283): INFO Epoch: [10] [1440/2502] eta: 0:13:33 lr: 0.000016 loss_cls: 4.1313 (3.9196) grad_norm: 2.3429 (2.3367) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:20 root] (utils.py 283): INFO Epoch: [10] [1450/2502] eta: 0:13:26 lr: 0.000016 loss_cls: 4.1313 (3.9198) grad_norm: 2.3069 (2.3362) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:27 root] (utils.py 283): INFO Epoch: [10] [1460/2502] eta: 0:13:18 lr: 0.000016 loss_cls: 3.6116 (3.9175) grad_norm: 2.2766 (2.3361) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:35 root] (utils.py 283): INFO Epoch: [10] [1470/2502] eta: 0:13:10 lr: 0.000016 loss_cls: 3.7223 (3.9172) grad_norm: 2.3037 (2.3362) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:42 root] (utils.py 283): INFO Epoch: [10] [1480/2502] eta: 0:13:02 lr: 0.000016 loss_cls: 3.9458 (3.9183) grad_norm: 2.3497 (2.3362) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:50 root] (utils.py 283): INFO Epoch: [10] [1490/2502] eta: 0:12:55 lr: 0.000016 loss_cls: 4.1974 (3.9186) grad_norm: 2.3291 (2.3361) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 17:02:58 root] (utils.py 283): INFO Epoch: [10] [1500/2502] eta: 0:12:47 lr: 0.000016 loss_cls: 4.1609 (3.9185) grad_norm: 2.2969 (2.3358) time: 0.7775 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:06 root] (utils.py 283): INFO Epoch: [10] [1510/2502] eta: 0:12:40 lr: 0.000016 loss_cls: 3.9255 (3.9190) grad_norm: 2.2391 (2.3351) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:13 root] (utils.py 283): INFO Epoch: [10] [1520/2502] eta: 0:12:32 lr: 0.000016 loss_cls: 4.1685 (3.9220) grad_norm: 2.2590 (2.3352) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:21 root] (utils.py 283): INFO Epoch: [10] [1530/2502] eta: 0:12:24 lr: 0.000016 loss_cls: 4.4220 (3.9232) grad_norm: 2.3690 (2.3355) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:28 root] (utils.py 283): INFO Epoch: [10] [1540/2502] eta: 0:12:17 lr: 0.000016 loss_cls: 4.0856 (3.9236) grad_norm: 2.3316 (2.3355) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:36 root] (utils.py 283): INFO Epoch: [10] [1550/2502] eta: 0:12:09 lr: 0.000016 loss_cls: 4.1605 (3.9245) grad_norm: 2.2594 (2.3348) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 17:03:44 root] (utils.py 283): INFO Epoch: [10] [1560/2502] eta: 0:12:01 lr: 0.000016 loss_cls: 4.0667 (3.9250) grad_norm: 2.2812 (2.3350) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:51 root] (utils.py 283): INFO Epoch: [10] [1570/2502] eta: 0:11:53 lr: 0.000016 loss_cls: 4.2303 (3.9259) grad_norm: 2.3053 (2.3349) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 17:03:59 root] (utils.py 283): INFO Epoch: [10] [1580/2502] eta: 0:11:46 lr: 0.000016 loss_cls: 4.1779 (3.9253) grad_norm: 2.3436 (2.3353) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 17:04:07 root] (utils.py 283): INFO Epoch: [10] [1590/2502] eta: 0:11:38 lr: 0.000016 loss_cls: 3.9604 (3.9252) grad_norm: 2.3436 (2.3353) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 17:04:14 root] (utils.py 283): INFO Epoch: [10] [1600/2502] eta: 0:11:30 lr: 0.000016 loss_cls: 3.8347 (3.9245) grad_norm: 2.3145 (2.3352) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 17:04:22 root] (utils.py 283): INFO Epoch: [10] [1610/2502] eta: 0:11:23 lr: 0.000016 loss_cls: 3.7289 (3.9232) grad_norm: 2.3220 (2.3356) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 17:04:30 root] (utils.py 283): INFO Epoch: [10] [1620/2502] eta: 0:11:15 lr: 0.000016 loss_cls: 3.4948 (3.9210) grad_norm: 2.4305 (2.3360) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 17:04:37 root] (utils.py 283): INFO Epoch: [10] [1630/2502] eta: 0:11:07 lr: 0.000016 loss_cls: 3.9360 (3.9219) grad_norm: 2.3332 (2.3359) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 17:04:45 root] (utils.py 283): INFO Epoch: [10] [1640/2502] eta: 0:11:00 lr: 0.000016 loss_cls: 4.2259 (3.9244) grad_norm: 2.2653 (2.3357) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 17:04:53 root] (utils.py 283): INFO Epoch: [10] [1650/2502] eta: 0:10:52 lr: 0.000016 loss_cls: 4.3423 (3.9257) grad_norm: 2.3354 (2.3359) time: 0.7805 data: 0.0003 max mem: 8426 +[2024-12-10 17:05:01 root] (utils.py 283): INFO Epoch: [10] [1660/2502] eta: 0:10:45 lr: 0.000016 loss_cls: 4.1875 (3.9264) grad_norm: 2.3383 (2.3358) time: 0.7804 data: 0.0003 max mem: 8426 +[2024-12-10 17:05:08 root] (utils.py 283): INFO Epoch: [10] [1670/2502] eta: 0:10:37 lr: 0.000016 loss_cls: 3.9358 (3.9264) grad_norm: 2.3397 (2.3359) time: 0.7790 data: 0.0002 max mem: 8426 +[2024-12-10 17:05:16 root] (utils.py 283): INFO Epoch: [10] [1680/2502] eta: 0:10:29 lr: 0.000016 loss_cls: 3.9358 (3.9267) grad_norm: 2.3279 (2.3359) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 17:05:24 root] (utils.py 283): INFO Epoch: [10] [1690/2502] eta: 0:10:22 lr: 0.000016 loss_cls: 3.9553 (3.9273) grad_norm: 2.3003 (2.3355) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 17:05:31 root] (utils.py 283): INFO Epoch: [10] [1700/2502] eta: 0:10:14 lr: 0.000016 loss_cls: 4.0475 (3.9278) grad_norm: 2.3026 (2.3352) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 17:05:39 root] (utils.py 283): INFO Epoch: [10] [1710/2502] eta: 0:10:06 lr: 0.000016 loss_cls: 4.1295 (3.9289) grad_norm: 2.3178 (2.3351) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 17:05:46 root] (utils.py 283): INFO Epoch: [10] [1720/2502] eta: 0:09:59 lr: 0.000016 loss_cls: 4.0217 (3.9287) grad_norm: 2.3342 (2.3353) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 17:05:54 root] (utils.py 283): INFO Epoch: [10] [1730/2502] eta: 0:09:51 lr: 0.000016 loss_cls: 3.7144 (3.9289) grad_norm: 2.2994 (2.3353) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:02 root] (utils.py 283): INFO Epoch: [10] [1740/2502] eta: 0:09:43 lr: 0.000016 loss_cls: 3.7617 (3.9274) grad_norm: 2.2994 (2.3354) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:09 root] (utils.py 283): INFO Epoch: [10] [1750/2502] eta: 0:09:36 lr: 0.000016 loss_cls: 3.7617 (3.9278) grad_norm: 2.3129 (2.3352) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:17 root] (utils.py 283): INFO Epoch: [10] [1760/2502] eta: 0:09:28 lr: 0.000016 loss_cls: 3.9971 (3.9281) grad_norm: 2.2733 (2.3350) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:25 root] (utils.py 283): INFO Epoch: [10] [1770/2502] eta: 0:09:20 lr: 0.000016 loss_cls: 4.1347 (3.9295) grad_norm: 2.2852 (2.3347) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:32 root] (utils.py 283): INFO Epoch: [10] [1780/2502] eta: 0:09:13 lr: 0.000016 loss_cls: 4.1347 (3.9290) grad_norm: 2.2938 (2.3346) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:40 root] (utils.py 283): INFO Epoch: [10] [1790/2502] eta: 0:09:05 lr: 0.000016 loss_cls: 4.1494 (3.9301) grad_norm: 2.3364 (2.3350) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 17:06:48 root] (utils.py 283): INFO Epoch: [10] [1800/2502] eta: 0:08:57 lr: 0.000016 loss_cls: 4.1777 (3.9305) grad_norm: 2.3377 (2.3353) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 17:06:55 root] (utils.py 283): INFO Epoch: [10] [1810/2502] eta: 0:08:50 lr: 0.000016 loss_cls: 4.1576 (3.9317) grad_norm: 2.3715 (2.3358) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:03 root] (utils.py 283): INFO Epoch: [10] [1820/2502] eta: 0:08:42 lr: 0.000016 loss_cls: 4.0919 (3.9298) grad_norm: 2.3773 (2.3362) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:10 root] (utils.py 283): INFO Epoch: [10] [1830/2502] eta: 0:08:34 lr: 0.000016 loss_cls: 3.8525 (3.9300) grad_norm: 2.3005 (2.3358) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:18 root] (utils.py 283): INFO Epoch: [10] [1840/2502] eta: 0:08:27 lr: 0.000016 loss_cls: 4.1178 (3.9312) grad_norm: 2.2765 (2.3356) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:26 root] (utils.py 283): INFO Epoch: [10] [1850/2502] eta: 0:08:19 lr: 0.000016 loss_cls: 4.1966 (3.9315) grad_norm: 2.3614 (2.3358) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:33 root] (utils.py 283): INFO Epoch: [10] [1860/2502] eta: 0:08:11 lr: 0.000016 loss_cls: 4.1966 (3.9327) grad_norm: 2.3289 (2.3354) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:41 root] (utils.py 283): INFO Epoch: [10] [1870/2502] eta: 0:08:04 lr: 0.000016 loss_cls: 4.1216 (3.9315) grad_norm: 2.2900 (2.3353) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:49 root] (utils.py 283): INFO Epoch: [10] [1880/2502] eta: 0:07:56 lr: 0.000016 loss_cls: 4.1216 (3.9325) grad_norm: 2.3001 (2.3349) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 17:07:56 root] (utils.py 283): INFO Epoch: [10] [1890/2502] eta: 0:07:48 lr: 0.000016 loss_cls: 3.9459 (3.9323) grad_norm: 2.3001 (2.3351) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 17:08:04 root] (utils.py 283): INFO Epoch: [10] [1900/2502] eta: 0:07:41 lr: 0.000016 loss_cls: 3.9459 (3.9324) grad_norm: 2.3063 (2.3350) time: 0.7708 data: 0.0003 max mem: 8426 +[2024-12-10 17:08:12 root] (utils.py 283): INFO Epoch: [10] [1910/2502] eta: 0:07:33 lr: 0.000016 loss_cls: 3.9818 (3.9315) grad_norm: 2.3063 (2.3352) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 17:08:20 root] (utils.py 283): INFO Epoch: [10] [1920/2502] eta: 0:07:25 lr: 0.000016 loss_cls: 3.9818 (3.9322) grad_norm: 2.3529 (2.3352) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 17:08:27 root] (utils.py 283): INFO Epoch: [10] [1930/2502] eta: 0:07:18 lr: 0.000016 loss_cls: 4.2501 (3.9327) grad_norm: 2.3098 (2.3350) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 17:08:35 root] (utils.py 283): INFO Epoch: [10] [1940/2502] eta: 0:07:10 lr: 0.000016 loss_cls: 4.2816 (3.9337) grad_norm: 2.3382 (2.3354) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 17:08:43 root] (utils.py 283): INFO Epoch: [10] [1950/2502] eta: 0:07:02 lr: 0.000016 loss_cls: 4.1412 (3.9341) grad_norm: 2.3801 (2.3352) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 17:08:50 root] (utils.py 283): INFO Epoch: [10] [1960/2502] eta: 0:06:55 lr: 0.000016 loss_cls: 4.0507 (3.9335) grad_norm: 2.3832 (2.3357) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 17:08:58 root] (utils.py 283): INFO Epoch: [10] [1970/2502] eta: 0:06:47 lr: 0.000016 loss_cls: 3.6628 (3.9327) grad_norm: 2.3832 (2.3357) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 17:09:06 root] (utils.py 283): INFO Epoch: [10] [1980/2502] eta: 0:06:39 lr: 0.000016 loss_cls: 4.0654 (3.9340) grad_norm: 2.3563 (2.3359) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 17:09:13 root] (utils.py 283): INFO Epoch: [10] [1990/2502] eta: 0:06:32 lr: 0.000016 loss_cls: 3.8487 (3.9331) grad_norm: 2.3836 (2.3365) time: 0.7753 data: 0.0003 max mem: 8426 +[2024-12-10 17:09:21 root] (utils.py 283): INFO Epoch: [10] [2000/2502] eta: 0:06:24 lr: 0.000016 loss_cls: 3.7429 (3.9325) grad_norm: 2.3552 (2.3364) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 17:09:29 root] (utils.py 283): INFO Epoch: [10] [2010/2502] eta: 0:06:16 lr: 0.000016 loss_cls: 4.0188 (3.9334) grad_norm: 2.3063 (2.3364) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 17:09:36 root] (utils.py 283): INFO Epoch: [10] [2020/2502] eta: 0:06:09 lr: 0.000016 loss_cls: 3.9516 (3.9327) grad_norm: 2.3240 (2.3361) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 17:09:44 root] (utils.py 283): INFO Epoch: [10] [2030/2502] eta: 0:06:01 lr: 0.000016 loss_cls: 4.0732 (3.9343) grad_norm: 2.2815 (2.3360) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 17:09:52 root] (utils.py 283): INFO Epoch: [10] [2040/2502] eta: 0:05:53 lr: 0.000016 loss_cls: 4.0435 (3.9336) grad_norm: 2.2777 (2.3358) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 17:09:59 root] (utils.py 283): INFO Epoch: [10] [2050/2502] eta: 0:05:46 lr: 0.000016 loss_cls: 3.7463 (3.9334) grad_norm: 2.2322 (2.3353) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:07 root] (utils.py 283): INFO Epoch: [10] [2060/2502] eta: 0:05:38 lr: 0.000016 loss_cls: 4.0458 (3.9326) grad_norm: 2.2698 (2.3354) time: 0.7792 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:15 root] (utils.py 283): INFO Epoch: [10] [2070/2502] eta: 0:05:31 lr: 0.000016 loss_cls: 4.1610 (3.9351) grad_norm: 2.2806 (2.3352) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:22 root] (utils.py 283): INFO Epoch: [10] [2080/2502] eta: 0:05:23 lr: 0.000016 loss_cls: 4.3048 (3.9360) grad_norm: 2.3040 (2.3357) time: 0.7561 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:30 root] (utils.py 283): INFO Epoch: [10] [2090/2502] eta: 0:05:15 lr: 0.000016 loss_cls: 4.1155 (3.9370) grad_norm: 2.3531 (2.3357) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:38 root] (utils.py 283): INFO Epoch: [10] [2100/2502] eta: 0:05:08 lr: 0.000016 loss_cls: 4.2304 (3.9379) grad_norm: 2.3446 (2.3356) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:45 root] (utils.py 283): INFO Epoch: [10] [2110/2502] eta: 0:05:00 lr: 0.000016 loss_cls: 4.2304 (3.9384) grad_norm: 2.3550 (2.3360) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 17:10:53 root] (utils.py 283): INFO Epoch: [10] [2120/2502] eta: 0:04:52 lr: 0.000016 loss_cls: 3.9559 (3.9383) grad_norm: 2.4045 (2.3362) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 17:11:01 root] (utils.py 283): INFO Epoch: [10] [2130/2502] eta: 0:04:45 lr: 0.000016 loss_cls: 4.1863 (3.9391) grad_norm: 2.3798 (2.3364) time: 0.7614 data: 0.0003 max mem: 8426 +[2024-12-10 17:11:08 root] (utils.py 283): INFO Epoch: [10] [2140/2502] eta: 0:04:37 lr: 0.000016 loss_cls: 4.0708 (3.9403) grad_norm: 2.2960 (2.3362) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 17:11:16 root] (utils.py 283): INFO Epoch: [10] [2150/2502] eta: 0:04:29 lr: 0.000016 loss_cls: 4.0456 (3.9399) grad_norm: 2.2608 (2.3359) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 17:11:23 root] (utils.py 283): INFO Epoch: [10] [2160/2502] eta: 0:04:22 lr: 0.000016 loss_cls: 3.8440 (3.9400) grad_norm: 2.2689 (2.3360) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 17:11:31 root] (utils.py 283): INFO Epoch: [10] [2170/2502] eta: 0:04:14 lr: 0.000016 loss_cls: 3.8440 (3.9391) grad_norm: 2.3305 (2.3362) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 17:11:39 root] (utils.py 283): INFO Epoch: [10] [2180/2502] eta: 0:04:06 lr: 0.000016 loss_cls: 4.1454 (3.9399) grad_norm: 2.2940 (2.3360) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 17:11:46 root] (utils.py 283): INFO Epoch: [10] [2190/2502] eta: 0:03:59 lr: 0.000016 loss_cls: 4.0880 (3.9390) grad_norm: 2.3339 (2.3361) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 17:11:54 root] (utils.py 283): INFO Epoch: [10] [2200/2502] eta: 0:03:51 lr: 0.000016 loss_cls: 3.6967 (3.9386) grad_norm: 2.3511 (2.3361) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:02 root] (utils.py 283): INFO Epoch: [10] [2210/2502] eta: 0:03:43 lr: 0.000016 loss_cls: 3.5577 (3.9361) grad_norm: 2.3025 (2.3358) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:09 root] (utils.py 283): INFO Epoch: [10] [2220/2502] eta: 0:03:36 lr: 0.000016 loss_cls: 3.5577 (3.9362) grad_norm: 2.3248 (2.3362) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:17 root] (utils.py 283): INFO Epoch: [10] [2230/2502] eta: 0:03:28 lr: 0.000016 loss_cls: 3.9139 (3.9353) grad_norm: 2.3411 (2.3363) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:25 root] (utils.py 283): INFO Epoch: [10] [2240/2502] eta: 0:03:20 lr: 0.000016 loss_cls: 4.1563 (3.9368) grad_norm: 2.2910 (2.3362) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:32 root] (utils.py 283): INFO Epoch: [10] [2250/2502] eta: 0:03:13 lr: 0.000016 loss_cls: 4.1373 (3.9371) grad_norm: 2.2589 (2.3361) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:40 root] (utils.py 283): INFO Epoch: [10] [2260/2502] eta: 0:03:05 lr: 0.000016 loss_cls: 3.9892 (3.9371) grad_norm: 2.2783 (2.3360) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:48 root] (utils.py 283): INFO Epoch: [10] [2270/2502] eta: 0:02:57 lr: 0.000016 loss_cls: 4.3618 (3.9378) grad_norm: 2.3006 (2.3359) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 17:12:55 root] (utils.py 283): INFO Epoch: [10] [2280/2502] eta: 0:02:50 lr: 0.000016 loss_cls: 3.9107 (3.9366) grad_norm: 2.3845 (2.3365) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 17:13:03 root] (utils.py 283): INFO Epoch: [10] [2290/2502] eta: 0:02:42 lr: 0.000016 loss_cls: 3.8941 (3.9359) grad_norm: 2.4199 (2.3366) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 17:13:11 root] (utils.py 283): INFO Epoch: [10] [2300/2502] eta: 0:02:34 lr: 0.000016 loss_cls: 4.0325 (3.9362) grad_norm: 2.2671 (2.3363) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 17:13:18 root] (utils.py 283): INFO Epoch: [10] [2310/2502] eta: 0:02:27 lr: 0.000016 loss_cls: 4.2458 (3.9373) grad_norm: 2.2816 (2.3363) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 17:13:26 root] (utils.py 283): INFO Epoch: [10] [2320/2502] eta: 0:02:19 lr: 0.000016 loss_cls: 4.1693 (3.9363) grad_norm: 2.2936 (2.3362) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 17:13:34 root] (utils.py 283): INFO Epoch: [10] [2330/2502] eta: 0:02:11 lr: 0.000016 loss_cls: 3.9576 (3.9366) grad_norm: 2.2914 (2.3361) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 17:13:41 root] (utils.py 283): INFO Epoch: [10] [2340/2502] eta: 0:02:04 lr: 0.000016 loss_cls: 4.2363 (3.9361) grad_norm: 2.2914 (2.3361) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 17:13:49 root] (utils.py 283): INFO Epoch: [10] [2350/2502] eta: 0:01:56 lr: 0.000016 loss_cls: 3.6828 (3.9346) grad_norm: 2.3164 (2.3361) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 17:13:56 root] (utils.py 283): INFO Epoch: [10] [2360/2502] eta: 0:01:48 lr: 0.000016 loss_cls: 3.6828 (3.9353) grad_norm: 2.3382 (2.3368) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:04 root] (utils.py 283): INFO Epoch: [10] [2370/2502] eta: 0:01:41 lr: 0.000016 loss_cls: 4.2601 (3.9361) grad_norm: 2.3585 (2.3371) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:12 root] (utils.py 283): INFO Epoch: [10] [2380/2502] eta: 0:01:33 lr: 0.000016 loss_cls: 4.1412 (3.9360) grad_norm: 2.2998 (2.3371) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:20 root] (utils.py 283): INFO Epoch: [10] [2390/2502] eta: 0:01:25 lr: 0.000016 loss_cls: 4.0231 (3.9358) grad_norm: 2.3384 (2.3374) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:27 root] (utils.py 283): INFO Epoch: [10] [2400/2502] eta: 0:01:18 lr: 0.000016 loss_cls: 3.9869 (3.9355) grad_norm: 2.2571 (2.3371) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:35 root] (utils.py 283): INFO Epoch: [10] [2410/2502] eta: 0:01:10 lr: 0.000016 loss_cls: 3.9747 (3.9346) grad_norm: 2.2403 (2.3368) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:43 root] (utils.py 283): INFO Epoch: [10] [2420/2502] eta: 0:01:02 lr: 0.000016 loss_cls: 3.9112 (3.9352) grad_norm: 2.3066 (2.3369) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:50 root] (utils.py 283): INFO Epoch: [10] [2430/2502] eta: 0:00:55 lr: 0.000016 loss_cls: 4.0272 (3.9345) grad_norm: 2.3066 (2.3368) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 17:14:58 root] (utils.py 283): INFO Epoch: [10] [2440/2502] eta: 0:00:47 lr: 0.000016 loss_cls: 3.9567 (3.9343) grad_norm: 2.2781 (2.3366) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 17:15:05 root] (utils.py 283): INFO Epoch: [10] [2450/2502] eta: 0:00:39 lr: 0.000016 loss_cls: 3.8611 (3.9335) grad_norm: 2.2744 (2.3364) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 17:15:13 root] (utils.py 283): INFO Epoch: [10] [2460/2502] eta: 0:00:32 lr: 0.000016 loss_cls: 3.9758 (3.9339) grad_norm: 2.3190 (2.3366) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 17:15:21 root] (utils.py 283): INFO Epoch: [10] [2470/2502] eta: 0:00:24 lr: 0.000016 loss_cls: 4.0297 (3.9339) grad_norm: 2.3417 (2.3365) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 17:15:28 root] (utils.py 283): INFO Epoch: [10] [2480/2502] eta: 0:00:16 lr: 0.000016 loss_cls: 4.0574 (3.9343) grad_norm: 2.3345 (2.3365) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 17:15:36 root] (utils.py 283): INFO Epoch: [10] [2490/2502] eta: 0:00:09 lr: 0.000016 loss_cls: 4.0574 (3.9333) grad_norm: 2.3384 (2.3367) time: 0.7854 data: 0.0228 max mem: 8426 +[2024-12-10 17:15:44 root] (utils.py 283): INFO Epoch: [10] [2500/2502] eta: 0:00:01 lr: 0.000016 loss_cls: 3.7361 (3.9330) grad_norm: 2.3897 (2.3368) time: 0.7858 data: 0.0228 max mem: 8426 +[2024-12-10 17:15:45 root] (utils.py 283): INFO Epoch: [10] [2501/2502] eta: 0:00:00 lr: 0.000016 loss_cls: 3.7361 (3.9328) grad_norm: 2.3897 (2.3367) time: 0.7853 data: 0.0228 max mem: 8426 +[2024-12-10 17:15:45 root] (utils.py 297): INFO Epoch: [10] Total time: 0:31:57 (0.7663 s / it) +[2024-12-10 17:15:45 root] (engine.py 179): INFO Averaged stats:lr: 0.000016 loss_cls: 3.7361 (3.9384) grad_norm: 2.3897 (2.3367) +[2024-12-10 17:15:45 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6318 (0.6318) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 99.2188 (99.2188) time: 0.1274 data: 0.0003 max mem: 8426 +[2024-12-10 17:15:47 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7384 (0.8154) acc1: 85.9375 (82.1023) acc3: 96.0938 (94.2472) acc5: 96.8750 (96.5909) time: 0.1277 data: 0.0003 max mem: 8426 +[2024-12-10 17:15:48 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8827 (0.8694) acc1: 79.6875 (80.8408) acc3: 92.9688 (93.2664) acc5: 94.5312 (95.6101) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 17:15:49 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9647 (0.8865) acc1: 78.9062 (79.9143) acc3: 92.9688 (93.2964) acc5: 95.3125 (95.7409) time: 0.1287 data: 0.0005 max mem: 8426 +[2024-12-10 17:15:51 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8277 (0.8771) acc1: 79.6875 (80.4116) acc3: 93.7500 (93.2355) acc5: 96.0938 (95.6936) time: 0.1408 data: 0.0126 max mem: 8426 +[2024-12-10 17:15:52 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0621 (0.9616) acc1: 75.0000 (78.3854) acc3: 88.2812 (91.6513) acc5: 92.9688 (94.6078) time: 0.1504 data: 0.0227 max mem: 8426 +[2024-12-10 17:15:54 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2509 (1.0060) acc1: 73.4375 (77.7152) acc3: 85.1562 (90.7147) acc5: 88.2812 (93.8012) time: 0.1565 data: 0.0287 max mem: 8426 +[2024-12-10 17:15:56 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2090 (1.0455) acc1: 75.0000 (76.8596) acc3: 86.7188 (90.2509) acc5: 90.6250 (93.3099) time: 0.1654 data: 0.0375 max mem: 8426 +[2024-12-10 17:15:57 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2090 (1.0780) acc1: 73.4375 (76.1574) acc3: 87.5000 (89.6701) acc5: 89.8438 (92.8337) time: 0.1577 data: 0.0291 max mem: 8426 +[2024-12-10 17:15:58 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2382 (1.1042) acc1: 71.8750 (75.4894) acc3: 85.9375 (89.3716) acc5: 89.8438 (92.6168) time: 0.1385 data: 0.0100 max mem: 8426 +[2024-12-10 17:15:59 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1529 (1.0910) acc1: 74.2188 (75.7600) acc3: 89.8438 (89.5760) acc5: 91.4062 (92.8240) time: 0.1358 data: 0.0100 max mem: 8426 +[2024-12-10 17:15:59 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1423 s / it) +[2024-12-10 17:15:59 root] (engine.py 264): INFO * Acc@1 75.514 Acc@3 89.532 Acc@5 92.722 loss 1.096 flops 1.285 layer_flops 1.251 +[2024-12-10 17:15:59 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 17:15:59 root] (main.py 576): INFO Max accuracy: 75.63% +[2024-12-10 17:16:00 root] (utils.py 283): INFO Epoch: [11] [ 0/2502] eta: 0:32:21 lr: 0.000015 loss_cls: 4.6338 (4.6338) grad_norm: 2.4345 (2.4345) time: 0.7760 data: 0.0004 max mem: 8426 +[2024-12-10 17:16:08 root] (utils.py 283): INFO Epoch: [11] [ 10/2502] eta: 0:32:11 lr: 0.000015 loss_cls: 4.4113 (4.1652) grad_norm: 2.3032 (2.3474) time: 0.7752 data: 0.0003 max mem: 8426 +[2024-12-10 17:16:16 root] (utils.py 283): INFO Epoch: [11] [ 20/2502] eta: 0:32:05 lr: 0.000015 loss_cls: 4.3579 (4.1951) grad_norm: 2.2937 (2.3408) time: 0.7758 data: 0.0002 max mem: 8426 +[2024-12-10 17:16:23 root] (utils.py 283): INFO Epoch: [11] [ 30/2502] eta: 0:31:46 lr: 0.000015 loss_cls: 4.3266 (4.1708) grad_norm: 2.3260 (2.3528) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 17:16:31 root] (utils.py 283): INFO Epoch: [11] [ 40/2502] eta: 0:31:35 lr: 0.000015 loss_cls: 4.1867 (4.1268) grad_norm: 2.2951 (2.3286) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 17:16:38 root] (utils.py 283): INFO Epoch: [11] [ 50/2502] eta: 0:31:26 lr: 0.000015 loss_cls: 4.0057 (4.0582) grad_norm: 2.2956 (2.3323) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 17:16:46 root] (utils.py 283): INFO Epoch: [11] [ 60/2502] eta: 0:31:18 lr: 0.000015 loss_cls: 3.8430 (4.0256) grad_norm: 2.3513 (2.3342) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 17:16:54 root] (utils.py 283): INFO Epoch: [11] [ 70/2502] eta: 0:31:10 lr: 0.000015 loss_cls: 3.9065 (4.0115) grad_norm: 2.2840 (2.3341) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 17:17:02 root] (utils.py 283): INFO Epoch: [11] [ 80/2502] eta: 0:31:01 lr: 0.000015 loss_cls: 3.9341 (4.0046) grad_norm: 2.2625 (2.3261) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:09 root] (utils.py 283): INFO Epoch: [11] [ 90/2502] eta: 0:30:53 lr: 0.000015 loss_cls: 3.9341 (3.9931) grad_norm: 2.3348 (2.3305) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:17 root] (utils.py 283): INFO Epoch: [11] [ 100/2502] eta: 0:30:46 lr: 0.000015 loss_cls: 3.6757 (3.9579) grad_norm: 2.3557 (2.3371) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:24 root] (utils.py 283): INFO Epoch: [11] [ 110/2502] eta: 0:30:36 lr: 0.000015 loss_cls: 3.7798 (3.9700) grad_norm: 2.3629 (2.3414) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:32 root] (utils.py 283): INFO Epoch: [11] [ 120/2502] eta: 0:30:29 lr: 0.000015 loss_cls: 3.9882 (3.9585) grad_norm: 2.4422 (2.3506) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:40 root] (utils.py 283): INFO Epoch: [11] [ 130/2502] eta: 0:30:21 lr: 0.000015 loss_cls: 3.9089 (3.9500) grad_norm: 2.4181 (2.3545) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:48 root] (utils.py 283): INFO Epoch: [11] [ 140/2502] eta: 0:30:13 lr: 0.000015 loss_cls: 4.1100 (3.9740) grad_norm: 2.4042 (2.3538) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 17:17:55 root] (utils.py 283): INFO Epoch: [11] [ 150/2502] eta: 0:30:04 lr: 0.000015 loss_cls: 4.2441 (3.9761) grad_norm: 2.4042 (2.3602) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:03 root] (utils.py 283): INFO Epoch: [11] [ 160/2502] eta: 0:29:55 lr: 0.000015 loss_cls: 4.0981 (3.9522) grad_norm: 2.3981 (2.3604) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:10 root] (utils.py 283): INFO Epoch: [11] [ 170/2502] eta: 0:29:47 lr: 0.000015 loss_cls: 3.9539 (3.9492) grad_norm: 2.3150 (2.3572) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:18 root] (utils.py 283): INFO Epoch: [11] [ 180/2502] eta: 0:29:39 lr: 0.000015 loss_cls: 4.0084 (3.9491) grad_norm: 2.2726 (2.3541) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:26 root] (utils.py 283): INFO Epoch: [11] [ 190/2502] eta: 0:29:31 lr: 0.000015 loss_cls: 4.0084 (3.9402) grad_norm: 2.2541 (2.3500) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:33 root] (utils.py 283): INFO Epoch: [11] [ 200/2502] eta: 0:29:25 lr: 0.000015 loss_cls: 3.9591 (3.9449) grad_norm: 2.2630 (2.3495) time: 0.7733 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:41 root] (utils.py 283): INFO Epoch: [11] [ 210/2502] eta: 0:29:18 lr: 0.000015 loss_cls: 4.0232 (3.9440) grad_norm: 2.2466 (2.3457) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:49 root] (utils.py 283): INFO Epoch: [11] [ 220/2502] eta: 0:29:11 lr: 0.000015 loss_cls: 3.8409 (3.9315) grad_norm: 2.2608 (2.3474) time: 0.7724 data: 0.0002 max mem: 8426 +[2024-12-10 17:18:56 root] (utils.py 283): INFO Epoch: [11] [ 230/2502] eta: 0:29:02 lr: 0.000015 loss_cls: 3.8409 (3.9313) grad_norm: 2.3082 (2.3470) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:04 root] (utils.py 283): INFO Epoch: [11] [ 240/2502] eta: 0:28:54 lr: 0.000015 loss_cls: 4.0050 (3.9345) grad_norm: 2.3008 (2.3450) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 17:19:12 root] (utils.py 283): INFO Epoch: [11] [ 250/2502] eta: 0:28:47 lr: 0.000015 loss_cls: 3.9903 (3.9358) grad_norm: 2.2977 (2.3442) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:19 root] (utils.py 283): INFO Epoch: [11] [ 260/2502] eta: 0:28:38 lr: 0.000015 loss_cls: 3.8893 (3.9326) grad_norm: 2.2979 (2.3456) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:27 root] (utils.py 283): INFO Epoch: [11] [ 270/2502] eta: 0:28:30 lr: 0.000015 loss_cls: 3.8150 (3.9285) grad_norm: 2.3079 (2.3444) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:35 root] (utils.py 283): INFO Epoch: [11] [ 280/2502] eta: 0:28:22 lr: 0.000015 loss_cls: 3.9879 (3.9379) grad_norm: 2.3079 (2.3455) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:42 root] (utils.py 283): INFO Epoch: [11] [ 290/2502] eta: 0:28:14 lr: 0.000015 loss_cls: 4.0989 (3.9409) grad_norm: 2.2804 (2.3432) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:50 root] (utils.py 283): INFO Epoch: [11] [ 300/2502] eta: 0:28:06 lr: 0.000015 loss_cls: 4.2045 (3.9455) grad_norm: 2.2745 (2.3419) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 17:19:57 root] (utils.py 283): INFO Epoch: [11] [ 310/2502] eta: 0:27:58 lr: 0.000015 loss_cls: 4.2045 (3.9417) grad_norm: 2.3136 (2.3422) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:05 root] (utils.py 283): INFO Epoch: [11] [ 320/2502] eta: 0:27:50 lr: 0.000015 loss_cls: 4.2028 (3.9469) grad_norm: 2.3281 (2.3413) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:13 root] (utils.py 283): INFO Epoch: [11] [ 330/2502] eta: 0:27:42 lr: 0.000015 loss_cls: 4.2056 (3.9559) grad_norm: 2.3033 (2.3408) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:20 root] (utils.py 283): INFO Epoch: [11] [ 340/2502] eta: 0:27:35 lr: 0.000015 loss_cls: 4.2198 (3.9569) grad_norm: 2.2989 (2.3417) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:28 root] (utils.py 283): INFO Epoch: [11] [ 350/2502] eta: 0:27:27 lr: 0.000015 loss_cls: 4.3214 (3.9637) grad_norm: 2.3442 (2.3424) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:36 root] (utils.py 283): INFO Epoch: [11] [ 360/2502] eta: 0:27:21 lr: 0.000015 loss_cls: 4.2524 (3.9648) grad_norm: 2.3734 (2.3426) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:44 root] (utils.py 283): INFO Epoch: [11] [ 370/2502] eta: 0:27:14 lr: 0.000015 loss_cls: 3.9079 (3.9602) grad_norm: 2.3218 (2.3418) time: 0.7829 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:51 root] (utils.py 283): INFO Epoch: [11] [ 380/2502] eta: 0:27:07 lr: 0.000015 loss_cls: 3.8303 (3.9555) grad_norm: 2.3551 (2.3423) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-10 17:20:59 root] (utils.py 283): INFO Epoch: [11] [ 390/2502] eta: 0:27:00 lr: 0.000015 loss_cls: 3.6228 (3.9496) grad_norm: 2.2987 (2.3407) time: 0.7794 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:07 root] (utils.py 283): INFO Epoch: [11] [ 400/2502] eta: 0:26:53 lr: 0.000015 loss_cls: 3.8758 (3.9495) grad_norm: 2.2645 (2.3391) time: 0.7785 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:15 root] (utils.py 283): INFO Epoch: [11] [ 410/2502] eta: 0:26:45 lr: 0.000015 loss_cls: 4.0449 (3.9508) grad_norm: 2.3133 (2.3382) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:23 root] (utils.py 283): INFO Epoch: [11] [ 420/2502] eta: 0:26:38 lr: 0.000015 loss_cls: 3.9949 (3.9458) grad_norm: 2.3713 (2.3390) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:30 root] (utils.py 283): INFO Epoch: [11] [ 430/2502] eta: 0:26:30 lr: 0.000015 loss_cls: 3.9956 (3.9486) grad_norm: 2.3816 (2.3390) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:38 root] (utils.py 283): INFO Epoch: [11] [ 440/2502] eta: 0:26:23 lr: 0.000015 loss_cls: 4.0090 (3.9475) grad_norm: 2.3120 (2.3381) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:46 root] (utils.py 283): INFO Epoch: [11] [ 450/2502] eta: 0:26:15 lr: 0.000015 loss_cls: 4.1499 (3.9533) grad_norm: 2.3191 (2.3391) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 17:21:53 root] (utils.py 283): INFO Epoch: [11] [ 460/2502] eta: 0:26:07 lr: 0.000015 loss_cls: 4.1499 (3.9492) grad_norm: 2.3399 (2.3394) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 17:22:01 root] (utils.py 283): INFO Epoch: [11] [ 470/2502] eta: 0:26:00 lr: 0.000015 loss_cls: 3.7569 (3.9499) grad_norm: 2.3399 (2.3393) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 17:22:09 root] (utils.py 283): INFO Epoch: [11] [ 480/2502] eta: 0:25:52 lr: 0.000015 loss_cls: 4.0988 (3.9525) grad_norm: 2.2827 (2.3388) time: 0.7690 data: 0.0003 max mem: 8426 +[2024-12-10 17:22:16 root] (utils.py 283): INFO Epoch: [11] [ 490/2502] eta: 0:25:44 lr: 0.000015 loss_cls: 3.9730 (3.9491) grad_norm: 2.3318 (2.3391) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 17:22:24 root] (utils.py 283): INFO Epoch: [11] [ 500/2502] eta: 0:25:36 lr: 0.000015 loss_cls: 4.0085 (3.9483) grad_norm: 2.3318 (2.3391) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 17:22:32 root] (utils.py 283): INFO Epoch: [11] [ 510/2502] eta: 0:25:29 lr: 0.000015 loss_cls: 4.0085 (3.9447) grad_norm: 2.3041 (2.3383) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 17:22:39 root] (utils.py 283): INFO Epoch: [11] [ 520/2502] eta: 0:25:21 lr: 0.000015 loss_cls: 3.9473 (3.9457) grad_norm: 2.2822 (2.3360) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 17:22:47 root] (utils.py 283): INFO Epoch: [11] [ 530/2502] eta: 0:25:13 lr: 0.000015 loss_cls: 4.0464 (3.9453) grad_norm: 2.2688 (2.3367) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 17:22:55 root] (utils.py 283): INFO Epoch: [11] [ 540/2502] eta: 0:25:05 lr: 0.000015 loss_cls: 3.8613 (3.9426) grad_norm: 2.3597 (2.3373) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:02 root] (utils.py 283): INFO Epoch: [11] [ 550/2502] eta: 0:24:58 lr: 0.000015 loss_cls: 3.7989 (3.9384) grad_norm: 2.3308 (2.3376) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:10 root] (utils.py 283): INFO Epoch: [11] [ 560/2502] eta: 0:24:50 lr: 0.000015 loss_cls: 4.1819 (3.9439) grad_norm: 2.3105 (2.3375) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:18 root] (utils.py 283): INFO Epoch: [11] [ 570/2502] eta: 0:24:43 lr: 0.000015 loss_cls: 4.3439 (3.9501) grad_norm: 2.3234 (2.3376) time: 0.7749 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:26 root] (utils.py 283): INFO Epoch: [11] [ 580/2502] eta: 0:24:36 lr: 0.000015 loss_cls: 3.8884 (3.9415) grad_norm: 2.2822 (2.3374) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:33 root] (utils.py 283): INFO Epoch: [11] [ 590/2502] eta: 0:24:28 lr: 0.000015 loss_cls: 3.8227 (3.9457) grad_norm: 2.2793 (2.3369) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:41 root] (utils.py 283): INFO Epoch: [11] [ 600/2502] eta: 0:24:20 lr: 0.000015 loss_cls: 3.9840 (3.9451) grad_norm: 2.3330 (2.3371) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:49 root] (utils.py 283): INFO Epoch: [11] [ 610/2502] eta: 0:24:13 lr: 0.000015 loss_cls: 3.7238 (3.9393) grad_norm: 2.3468 (2.3371) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 17:23:56 root] (utils.py 283): INFO Epoch: [11] [ 620/2502] eta: 0:24:05 lr: 0.000015 loss_cls: 3.7162 (3.9369) grad_norm: 2.3385 (2.3374) time: 0.7779 data: 0.0002 max mem: 8426 +[2024-12-10 17:24:04 root] (utils.py 283): INFO Epoch: [11] [ 630/2502] eta: 0:23:58 lr: 0.000015 loss_cls: 4.1217 (3.9419) grad_norm: 2.3227 (2.3367) time: 0.7819 data: 0.0002 max mem: 8426 +[2024-12-10 17:24:12 root] (utils.py 283): INFO Epoch: [11] [ 640/2502] eta: 0:23:51 lr: 0.000015 loss_cls: 4.1919 (3.9394) grad_norm: 2.3656 (2.3378) time: 0.7805 data: 0.0002 max mem: 8426 +[2024-12-10 17:24:20 root] (utils.py 283): INFO Epoch: [11] [ 650/2502] eta: 0:23:43 lr: 0.000015 loss_cls: 4.0686 (3.9421) grad_norm: 2.3656 (2.3373) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 17:24:28 root] (utils.py 283): INFO Epoch: [11] [ 660/2502] eta: 0:23:36 lr: 0.000015 loss_cls: 3.9088 (3.9401) grad_norm: 2.2435 (2.3358) time: 0.7832 data: 0.0002 max mem: 8426 +[2024-12-10 17:24:35 root] (utils.py 283): INFO Epoch: [11] [ 670/2502] eta: 0:23:29 lr: 0.000015 loss_cls: 3.7226 (3.9364) grad_norm: 2.2435 (2.3353) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 17:24:43 root] (utils.py 283): INFO Epoch: [11] [ 680/2502] eta: 0:23:21 lr: 0.000015 loss_cls: 4.0553 (3.9386) grad_norm: 2.3052 (2.3358) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 17:24:51 root] (utils.py 283): INFO Epoch: [11] [ 690/2502] eta: 0:23:13 lr: 0.000015 loss_cls: 3.9963 (3.9352) grad_norm: 2.3052 (2.3356) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-10 17:24:58 root] (utils.py 283): INFO Epoch: [11] [ 700/2502] eta: 0:23:05 lr: 0.000015 loss_cls: 3.6761 (3.9355) grad_norm: 2.2981 (2.3357) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 17:25:06 root] (utils.py 283): INFO Epoch: [11] [ 710/2502] eta: 0:22:57 lr: 0.000015 loss_cls: 4.2562 (3.9393) grad_norm: 2.3598 (2.3361) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 17:25:13 root] (utils.py 283): INFO Epoch: [11] [ 720/2502] eta: 0:22:49 lr: 0.000015 loss_cls: 3.9806 (3.9321) grad_norm: 2.3305 (2.3360) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 17:25:21 root] (utils.py 283): INFO Epoch: [11] [ 730/2502] eta: 0:22:41 lr: 0.000015 loss_cls: 3.9276 (3.9332) grad_norm: 2.3111 (2.3354) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 17:25:29 root] (utils.py 283): INFO Epoch: [11] [ 740/2502] eta: 0:22:34 lr: 0.000015 loss_cls: 4.0858 (3.9381) grad_norm: 2.3328 (2.3361) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 17:25:36 root] (utils.py 283): INFO Epoch: [11] [ 750/2502] eta: 0:22:26 lr: 0.000015 loss_cls: 4.0926 (3.9376) grad_norm: 2.3224 (2.3351) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 17:25:44 root] (utils.py 283): INFO Epoch: [11] [ 760/2502] eta: 0:22:18 lr: 0.000015 loss_cls: 3.9076 (3.9379) grad_norm: 2.2448 (2.3345) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 17:25:52 root] (utils.py 283): INFO Epoch: [11] [ 770/2502] eta: 0:22:10 lr: 0.000015 loss_cls: 3.9472 (3.9364) grad_norm: 2.2559 (2.3339) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 17:25:59 root] (utils.py 283): INFO Epoch: [11] [ 780/2502] eta: 0:22:02 lr: 0.000015 loss_cls: 3.8356 (3.9351) grad_norm: 2.3145 (2.3353) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 17:26:07 root] (utils.py 283): INFO Epoch: [11] [ 790/2502] eta: 0:21:54 lr: 0.000015 loss_cls: 4.0274 (3.9370) grad_norm: 2.3371 (2.3347) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 17:26:14 root] (utils.py 283): INFO Epoch: [11] [ 800/2502] eta: 0:21:47 lr: 0.000015 loss_cls: 4.0274 (3.9353) grad_norm: 2.3440 (2.3355) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 17:26:22 root] (utils.py 283): INFO Epoch: [11] [ 810/2502] eta: 0:21:39 lr: 0.000015 loss_cls: 3.8119 (3.9333) grad_norm: 2.3892 (2.3364) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 17:26:30 root] (utils.py 283): INFO Epoch: [11] [ 820/2502] eta: 0:21:31 lr: 0.000015 loss_cls: 3.7588 (3.9311) grad_norm: 2.3626 (2.3364) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 17:26:38 root] (utils.py 283): INFO Epoch: [11] [ 830/2502] eta: 0:21:24 lr: 0.000015 loss_cls: 3.6466 (3.9275) grad_norm: 2.3031 (2.3353) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 17:26:45 root] (utils.py 283): INFO Epoch: [11] [ 840/2502] eta: 0:21:16 lr: 0.000015 loss_cls: 4.1079 (3.9307) grad_norm: 2.3146 (2.3357) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 17:26:53 root] (utils.py 283): INFO Epoch: [11] [ 850/2502] eta: 0:21:08 lr: 0.000015 loss_cls: 4.2543 (3.9310) grad_norm: 2.3874 (2.3361) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 17:27:01 root] (utils.py 283): INFO Epoch: [11] [ 860/2502] eta: 0:21:00 lr: 0.000015 loss_cls: 3.9779 (3.9305) grad_norm: 2.2788 (2.3351) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 17:27:08 root] (utils.py 283): INFO Epoch: [11] [ 870/2502] eta: 0:20:53 lr: 0.000015 loss_cls: 3.8176 (3.9277) grad_norm: 2.2556 (2.3347) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 17:27:16 root] (utils.py 283): INFO Epoch: [11] [ 880/2502] eta: 0:20:45 lr: 0.000015 loss_cls: 3.9165 (3.9280) grad_norm: 2.2971 (2.3349) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 17:27:24 root] (utils.py 283): INFO Epoch: [11] [ 890/2502] eta: 0:20:37 lr: 0.000015 loss_cls: 3.9165 (3.9272) grad_norm: 2.3119 (2.3343) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 17:27:31 root] (utils.py 283): INFO Epoch: [11] [ 900/2502] eta: 0:20:30 lr: 0.000015 loss_cls: 3.8951 (3.9267) grad_norm: 2.3134 (2.3344) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 17:27:39 root] (utils.py 283): INFO Epoch: [11] [ 910/2502] eta: 0:20:22 lr: 0.000015 loss_cls: 3.9166 (3.9260) grad_norm: 2.3520 (2.3349) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 17:27:46 root] (utils.py 283): INFO Epoch: [11] [ 920/2502] eta: 0:20:14 lr: 0.000015 loss_cls: 3.7925 (3.9249) grad_norm: 2.3164 (2.3344) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 17:27:54 root] (utils.py 283): INFO Epoch: [11] [ 930/2502] eta: 0:20:06 lr: 0.000015 loss_cls: 3.7659 (3.9241) grad_norm: 2.2678 (2.3337) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 17:28:02 root] (utils.py 283): INFO Epoch: [11] [ 940/2502] eta: 0:19:59 lr: 0.000015 loss_cls: 4.0418 (3.9246) grad_norm: 2.3088 (2.3338) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 17:28:09 root] (utils.py 283): INFO Epoch: [11] [ 950/2502] eta: 0:19:51 lr: 0.000015 loss_cls: 4.2154 (3.9255) grad_norm: 2.3177 (2.3331) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 17:28:17 root] (utils.py 283): INFO Epoch: [11] [ 960/2502] eta: 0:19:43 lr: 0.000015 loss_cls: 4.0377 (3.9239) grad_norm: 2.2493 (2.3327) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 17:28:25 root] (utils.py 283): INFO Epoch: [11] [ 970/2502] eta: 0:19:35 lr: 0.000015 loss_cls: 4.0377 (3.9252) grad_norm: 2.3359 (2.3333) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 17:28:32 root] (utils.py 283): INFO Epoch: [11] [ 980/2502] eta: 0:19:28 lr: 0.000015 loss_cls: 4.1138 (3.9246) grad_norm: 2.3359 (2.3333) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 17:28:40 root] (utils.py 283): INFO Epoch: [11] [ 990/2502] eta: 0:19:20 lr: 0.000015 loss_cls: 4.1138 (3.9270) grad_norm: 2.3043 (2.3339) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 17:28:48 root] (utils.py 283): INFO Epoch: [11] [1000/2502] eta: 0:19:12 lr: 0.000015 loss_cls: 4.0094 (3.9252) grad_norm: 2.2986 (2.3337) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 17:28:55 root] (utils.py 283): INFO Epoch: [11] [1010/2502] eta: 0:19:05 lr: 0.000015 loss_cls: 3.8420 (3.9248) grad_norm: 2.2519 (2.3334) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 17:29:03 root] (utils.py 283): INFO Epoch: [11] [1020/2502] eta: 0:18:57 lr: 0.000015 loss_cls: 4.0274 (3.9265) grad_norm: 2.2679 (2.3337) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 17:29:11 root] (utils.py 283): INFO Epoch: [11] [1030/2502] eta: 0:18:49 lr: 0.000015 loss_cls: 4.0338 (3.9260) grad_norm: 2.3331 (2.3346) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 17:29:18 root] (utils.py 283): INFO Epoch: [11] [1040/2502] eta: 0:18:41 lr: 0.000015 loss_cls: 4.2132 (3.9282) grad_norm: 2.3331 (2.3343) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 17:29:26 root] (utils.py 283): INFO Epoch: [11] [1050/2502] eta: 0:18:34 lr: 0.000015 loss_cls: 4.1675 (3.9257) grad_norm: 2.2880 (2.3346) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 17:29:33 root] (utils.py 283): INFO Epoch: [11] [1060/2502] eta: 0:18:26 lr: 0.000015 loss_cls: 4.0744 (3.9271) grad_norm: 2.2827 (2.3346) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 17:29:41 root] (utils.py 283): INFO Epoch: [11] [1070/2502] eta: 0:18:18 lr: 0.000015 loss_cls: 4.0858 (3.9278) grad_norm: 2.2687 (2.3345) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 17:29:49 root] (utils.py 283): INFO Epoch: [11] [1080/2502] eta: 0:18:10 lr: 0.000015 loss_cls: 4.0994 (3.9282) grad_norm: 2.2719 (2.3347) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 17:29:56 root] (utils.py 283): INFO Epoch: [11] [1090/2502] eta: 0:18:03 lr: 0.000015 loss_cls: 4.0817 (3.9299) grad_norm: 2.2939 (2.3345) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:04 root] (utils.py 283): INFO Epoch: [11] [1100/2502] eta: 0:17:55 lr: 0.000015 loss_cls: 4.3287 (3.9333) grad_norm: 2.3155 (2.3343) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:12 root] (utils.py 283): INFO Epoch: [11] [1110/2502] eta: 0:17:47 lr: 0.000015 loss_cls: 4.2857 (3.9323) grad_norm: 2.3323 (2.3340) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:19 root] (utils.py 283): INFO Epoch: [11] [1120/2502] eta: 0:17:39 lr: 0.000015 loss_cls: 4.0876 (3.9316) grad_norm: 2.3158 (2.3339) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:27 root] (utils.py 283): INFO Epoch: [11] [1130/2502] eta: 0:17:32 lr: 0.000015 loss_cls: 4.0054 (3.9321) grad_norm: 2.3161 (2.3339) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:34 root] (utils.py 283): INFO Epoch: [11] [1140/2502] eta: 0:17:24 lr: 0.000015 loss_cls: 4.0054 (3.9333) grad_norm: 2.2655 (2.3331) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:42 root] (utils.py 283): INFO Epoch: [11] [1150/2502] eta: 0:17:16 lr: 0.000015 loss_cls: 3.8206 (3.9314) grad_norm: 2.2484 (2.3332) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:50 root] (utils.py 283): INFO Epoch: [11] [1160/2502] eta: 0:17:09 lr: 0.000015 loss_cls: 3.8017 (3.9322) grad_norm: 2.2903 (2.3328) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 17:30:57 root] (utils.py 283): INFO Epoch: [11] [1170/2502] eta: 0:17:01 lr: 0.000015 loss_cls: 4.0471 (3.9333) grad_norm: 2.3051 (2.3327) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 17:31:05 root] (utils.py 283): INFO Epoch: [11] [1180/2502] eta: 0:16:53 lr: 0.000015 loss_cls: 4.2550 (3.9352) grad_norm: 2.3455 (2.3330) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 17:31:13 root] (utils.py 283): INFO Epoch: [11] [1190/2502] eta: 0:16:46 lr: 0.000015 loss_cls: 4.2621 (3.9356) grad_norm: 2.3732 (2.3335) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 17:31:20 root] (utils.py 283): INFO Epoch: [11] [1200/2502] eta: 0:16:38 lr: 0.000015 loss_cls: 3.9650 (3.9368) grad_norm: 2.3932 (2.3341) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 17:31:28 root] (utils.py 283): INFO Epoch: [11] [1210/2502] eta: 0:16:30 lr: 0.000015 loss_cls: 4.2303 (3.9392) grad_norm: 2.3650 (2.3342) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 17:31:36 root] (utils.py 283): INFO Epoch: [11] [1220/2502] eta: 0:16:22 lr: 0.000015 loss_cls: 4.0764 (3.9372) grad_norm: 2.3589 (2.3349) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 17:31:43 root] (utils.py 283): INFO Epoch: [11] [1230/2502] eta: 0:16:15 lr: 0.000015 loss_cls: 4.0168 (3.9381) grad_norm: 2.4152 (2.3355) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 17:31:51 root] (utils.py 283): INFO Epoch: [11] [1240/2502] eta: 0:16:07 lr: 0.000015 loss_cls: 4.0619 (3.9371) grad_norm: 2.3402 (2.3352) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 17:31:59 root] (utils.py 283): INFO Epoch: [11] [1250/2502] eta: 0:16:00 lr: 0.000015 loss_cls: 3.9572 (3.9347) grad_norm: 2.3397 (2.3351) time: 0.7737 data: 0.0003 max mem: 8426 +[2024-12-10 17:32:06 root] (utils.py 283): INFO Epoch: [11] [1260/2502] eta: 0:15:52 lr: 0.000015 loss_cls: 3.8989 (3.9335) grad_norm: 2.3780 (2.3360) time: 0.7766 data: 0.0003 max mem: 8426 +[2024-12-10 17:32:14 root] (utils.py 283): INFO Epoch: [11] [1270/2502] eta: 0:15:44 lr: 0.000015 loss_cls: 3.9584 (3.9351) grad_norm: 2.3849 (2.3360) time: 0.7774 data: 0.0003 max mem: 8426 +[2024-12-10 17:32:22 root] (utils.py 283): INFO Epoch: [11] [1280/2502] eta: 0:15:37 lr: 0.000015 loss_cls: 4.0635 (3.9350) grad_norm: 2.3148 (2.3357) time: 0.7766 data: 0.0003 max mem: 8426 +[2024-12-10 17:32:30 root] (utils.py 283): INFO Epoch: [11] [1290/2502] eta: 0:15:29 lr: 0.000015 loss_cls: 4.0541 (3.9342) grad_norm: 2.3386 (2.3360) time: 0.7762 data: 0.0003 max mem: 8426 +[2024-12-10 17:32:37 root] (utils.py 283): INFO Epoch: [11] [1300/2502] eta: 0:15:22 lr: 0.000015 loss_cls: 4.0829 (3.9340) grad_norm: 2.3453 (2.3361) time: 0.7751 data: 0.0003 max mem: 8426 +[2024-12-10 17:32:45 root] (utils.py 283): INFO Epoch: [11] [1310/2502] eta: 0:15:14 lr: 0.000015 loss_cls: 4.0829 (3.9347) grad_norm: 2.3453 (2.3368) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 17:32:53 root] (utils.py 283): INFO Epoch: [11] [1320/2502] eta: 0:15:06 lr: 0.000015 loss_cls: 4.0823 (3.9332) grad_norm: 2.3895 (2.3369) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 17:33:00 root] (utils.py 283): INFO Epoch: [11] [1330/2502] eta: 0:14:58 lr: 0.000015 loss_cls: 3.8453 (3.9339) grad_norm: 2.2796 (2.3363) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 17:33:08 root] (utils.py 283): INFO Epoch: [11] [1340/2502] eta: 0:14:51 lr: 0.000015 loss_cls: 4.0906 (3.9356) grad_norm: 2.2803 (2.3362) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 17:33:16 root] (utils.py 283): INFO Epoch: [11] [1350/2502] eta: 0:14:43 lr: 0.000015 loss_cls: 4.2881 (3.9373) grad_norm: 2.3313 (2.3361) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 17:33:23 root] (utils.py 283): INFO Epoch: [11] [1360/2502] eta: 0:14:35 lr: 0.000015 loss_cls: 4.0366 (3.9371) grad_norm: 2.3355 (2.3362) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 17:33:31 root] (utils.py 283): INFO Epoch: [11] [1370/2502] eta: 0:14:28 lr: 0.000015 loss_cls: 3.8574 (3.9353) grad_norm: 2.3350 (2.3361) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 17:33:39 root] (utils.py 283): INFO Epoch: [11] [1380/2502] eta: 0:14:20 lr: 0.000015 loss_cls: 3.8594 (3.9346) grad_norm: 2.3462 (2.3368) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 17:33:46 root] (utils.py 283): INFO Epoch: [11] [1390/2502] eta: 0:14:12 lr: 0.000015 loss_cls: 3.8679 (3.9330) grad_norm: 2.3664 (2.3370) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 17:33:54 root] (utils.py 283): INFO Epoch: [11] [1400/2502] eta: 0:14:05 lr: 0.000015 loss_cls: 3.9483 (3.9326) grad_norm: 2.3324 (2.3367) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 17:34:01 root] (utils.py 283): INFO Epoch: [11] [1410/2502] eta: 0:13:57 lr: 0.000015 loss_cls: 3.8790 (3.9313) grad_norm: 2.2932 (2.3365) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 17:34:09 root] (utils.py 283): INFO Epoch: [11] [1420/2502] eta: 0:13:49 lr: 0.000015 loss_cls: 3.6804 (3.9299) grad_norm: 2.2951 (2.3365) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 17:34:17 root] (utils.py 283): INFO Epoch: [11] [1430/2502] eta: 0:13:42 lr: 0.000015 loss_cls: 3.6608 (3.9282) grad_norm: 2.3380 (2.3369) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 17:34:24 root] (utils.py 283): INFO Epoch: [11] [1440/2502] eta: 0:13:34 lr: 0.000015 loss_cls: 3.9920 (3.9296) grad_norm: 2.3253 (2.3369) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 17:34:32 root] (utils.py 283): INFO Epoch: [11] [1450/2502] eta: 0:13:26 lr: 0.000015 loss_cls: 4.0870 (3.9301) grad_norm: 2.3113 (2.3366) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 17:34:40 root] (utils.py 283): INFO Epoch: [11] [1460/2502] eta: 0:13:19 lr: 0.000015 loss_cls: 4.0219 (3.9305) grad_norm: 2.3051 (2.3364) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 17:34:47 root] (utils.py 283): INFO Epoch: [11] [1470/2502] eta: 0:13:11 lr: 0.000015 loss_cls: 4.0140 (3.9303) grad_norm: 2.3051 (2.3367) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 17:34:55 root] (utils.py 283): INFO Epoch: [11] [1480/2502] eta: 0:13:03 lr: 0.000015 loss_cls: 4.0144 (3.9302) grad_norm: 2.3127 (2.3366) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:03 root] (utils.py 283): INFO Epoch: [11] [1490/2502] eta: 0:12:56 lr: 0.000015 loss_cls: 4.1661 (3.9306) grad_norm: 2.2964 (2.3363) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:10 root] (utils.py 283): INFO Epoch: [11] [1500/2502] eta: 0:12:48 lr: 0.000015 loss_cls: 3.9554 (3.9301) grad_norm: 2.2797 (2.3359) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:18 root] (utils.py 283): INFO Epoch: [11] [1510/2502] eta: 0:12:40 lr: 0.000015 loss_cls: 3.9554 (3.9312) grad_norm: 2.2753 (2.3357) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:26 root] (utils.py 283): INFO Epoch: [11] [1520/2502] eta: 0:12:33 lr: 0.000015 loss_cls: 4.1047 (3.9302) grad_norm: 2.3082 (2.3358) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:33 root] (utils.py 283): INFO Epoch: [11] [1530/2502] eta: 0:12:25 lr: 0.000015 loss_cls: 3.8578 (3.9301) grad_norm: 2.3563 (2.3359) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:41 root] (utils.py 283): INFO Epoch: [11] [1540/2502] eta: 0:12:17 lr: 0.000015 loss_cls: 3.9867 (3.9305) grad_norm: 2.3451 (2.3358) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:49 root] (utils.py 283): INFO Epoch: [11] [1550/2502] eta: 0:12:10 lr: 0.000015 loss_cls: 3.9873 (3.9296) grad_norm: 2.3244 (2.3358) time: 0.7762 data: 0.0002 max mem: 8426 +[2024-12-10 17:35:57 root] (utils.py 283): INFO Epoch: [11] [1560/2502] eta: 0:12:02 lr: 0.000015 loss_cls: 3.9873 (3.9312) grad_norm: 2.3513 (2.3360) time: 0.7829 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:04 root] (utils.py 283): INFO Epoch: [11] [1570/2502] eta: 0:11:54 lr: 0.000015 loss_cls: 3.9945 (3.9311) grad_norm: 2.3513 (2.3362) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:12 root] (utils.py 283): INFO Epoch: [11] [1580/2502] eta: 0:11:47 lr: 0.000015 loss_cls: 4.0050 (3.9317) grad_norm: 2.3555 (2.3362) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:20 root] (utils.py 283): INFO Epoch: [11] [1590/2502] eta: 0:11:39 lr: 0.000015 loss_cls: 4.0050 (3.9312) grad_norm: 2.3394 (2.3361) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:27 root] (utils.py 283): INFO Epoch: [11] [1600/2502] eta: 0:11:31 lr: 0.000015 loss_cls: 3.9495 (3.9306) grad_norm: 2.2961 (2.3361) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:35 root] (utils.py 283): INFO Epoch: [11] [1610/2502] eta: 0:11:24 lr: 0.000015 loss_cls: 3.7472 (3.9280) grad_norm: 2.3100 (2.3361) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:43 root] (utils.py 283): INFO Epoch: [11] [1620/2502] eta: 0:11:16 lr: 0.000015 loss_cls: 3.7764 (3.9292) grad_norm: 2.3630 (2.3364) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:50 root] (utils.py 283): INFO Epoch: [11] [1630/2502] eta: 0:11:08 lr: 0.000015 loss_cls: 4.2040 (3.9299) grad_norm: 2.3454 (2.3363) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 17:36:58 root] (utils.py 283): INFO Epoch: [11] [1640/2502] eta: 0:11:01 lr: 0.000015 loss_cls: 4.0871 (3.9299) grad_norm: 2.3335 (2.3364) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 17:37:06 root] (utils.py 283): INFO Epoch: [11] [1650/2502] eta: 0:10:53 lr: 0.000015 loss_cls: 3.9579 (3.9290) grad_norm: 2.2916 (2.3359) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 17:37:13 root] (utils.py 283): INFO Epoch: [11] [1660/2502] eta: 0:10:45 lr: 0.000015 loss_cls: 3.7594 (3.9280) grad_norm: 2.2824 (2.3357) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 17:37:21 root] (utils.py 283): INFO Epoch: [11] [1670/2502] eta: 0:10:38 lr: 0.000015 loss_cls: 3.7451 (3.9273) grad_norm: 2.3239 (2.3359) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 17:37:29 root] (utils.py 283): INFO Epoch: [11] [1680/2502] eta: 0:10:30 lr: 0.000015 loss_cls: 3.8641 (3.9268) grad_norm: 2.3662 (2.3362) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 17:37:36 root] (utils.py 283): INFO Epoch: [11] [1690/2502] eta: 0:10:22 lr: 0.000015 loss_cls: 4.1112 (3.9268) grad_norm: 2.3543 (2.3363) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 17:37:44 root] (utils.py 283): INFO Epoch: [11] [1700/2502] eta: 0:10:15 lr: 0.000015 loss_cls: 4.1801 (3.9269) grad_norm: 2.3543 (2.3365) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 17:37:52 root] (utils.py 283): INFO Epoch: [11] [1710/2502] eta: 0:10:07 lr: 0.000015 loss_cls: 4.0657 (3.9258) grad_norm: 2.3706 (2.3365) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 17:37:59 root] (utils.py 283): INFO Epoch: [11] [1720/2502] eta: 0:09:59 lr: 0.000015 loss_cls: 3.9532 (3.9253) grad_norm: 2.3157 (2.3366) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:07 root] (utils.py 283): INFO Epoch: [11] [1730/2502] eta: 0:09:51 lr: 0.000015 loss_cls: 4.1497 (3.9254) grad_norm: 2.3670 (2.3367) time: 0.7590 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:14 root] (utils.py 283): INFO Epoch: [11] [1740/2502] eta: 0:09:44 lr: 0.000015 loss_cls: 4.0472 (3.9255) grad_norm: 2.3252 (2.3367) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:22 root] (utils.py 283): INFO Epoch: [11] [1750/2502] eta: 0:09:36 lr: 0.000015 loss_cls: 3.8755 (3.9253) grad_norm: 2.3795 (2.3371) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:29 root] (utils.py 283): INFO Epoch: [11] [1760/2502] eta: 0:09:28 lr: 0.000015 loss_cls: 4.1441 (3.9273) grad_norm: 2.3605 (2.3371) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:37 root] (utils.py 283): INFO Epoch: [11] [1770/2502] eta: 0:09:21 lr: 0.000015 loss_cls: 4.2783 (3.9294) grad_norm: 2.2953 (2.3370) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:45 root] (utils.py 283): INFO Epoch: [11] [1780/2502] eta: 0:09:13 lr: 0.000015 loss_cls: 4.1955 (3.9292) grad_norm: 2.3072 (2.3370) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 17:38:52 root] (utils.py 283): INFO Epoch: [11] [1790/2502] eta: 0:09:05 lr: 0.000015 loss_cls: 3.9421 (3.9296) grad_norm: 2.3326 (2.3371) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:00 root] (utils.py 283): INFO Epoch: [11] [1800/2502] eta: 0:08:58 lr: 0.000015 loss_cls: 3.9497 (3.9295) grad_norm: 2.3505 (2.3375) time: 0.7542 data: 0.0003 max mem: 8426 +[2024-12-10 17:39:07 root] (utils.py 283): INFO Epoch: [11] [1810/2502] eta: 0:08:50 lr: 0.000015 loss_cls: 4.0239 (3.9303) grad_norm: 2.3471 (2.3377) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:15 root] (utils.py 283): INFO Epoch: [11] [1820/2502] eta: 0:08:42 lr: 0.000015 loss_cls: 4.1885 (3.9314) grad_norm: 2.3354 (2.3378) time: 0.7546 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:22 root] (utils.py 283): INFO Epoch: [11] [1830/2502] eta: 0:08:34 lr: 0.000015 loss_cls: 4.2561 (3.9331) grad_norm: 2.3908 (2.3382) time: 0.7546 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:30 root] (utils.py 283): INFO Epoch: [11] [1840/2502] eta: 0:08:27 lr: 0.000015 loss_cls: 3.9924 (3.9311) grad_norm: 2.3546 (2.3380) time: 0.7548 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:37 root] (utils.py 283): INFO Epoch: [11] [1850/2502] eta: 0:08:19 lr: 0.000015 loss_cls: 3.7396 (3.9307) grad_norm: 2.2903 (2.3380) time: 0.7548 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:45 root] (utils.py 283): INFO Epoch: [11] [1860/2502] eta: 0:08:11 lr: 0.000015 loss_cls: 3.7510 (3.9299) grad_norm: 2.3758 (2.3382) time: 0.7555 data: 0.0002 max mem: 8426 +[2024-12-10 17:39:53 root] (utils.py 283): INFO Epoch: [11] [1870/2502] eta: 0:08:04 lr: 0.000015 loss_cls: 3.9648 (3.9298) grad_norm: 2.3288 (2.3381) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:00 root] (utils.py 283): INFO Epoch: [11] [1880/2502] eta: 0:07:56 lr: 0.000015 loss_cls: 4.1619 (3.9303) grad_norm: 2.2729 (2.3379) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:08 root] (utils.py 283): INFO Epoch: [11] [1890/2502] eta: 0:07:48 lr: 0.000015 loss_cls: 4.0572 (3.9301) grad_norm: 2.2792 (2.3377) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:16 root] (utils.py 283): INFO Epoch: [11] [1900/2502] eta: 0:07:41 lr: 0.000015 loss_cls: 3.9479 (3.9300) grad_norm: 2.2986 (2.3378) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:24 root] (utils.py 283): INFO Epoch: [11] [1910/2502] eta: 0:07:33 lr: 0.000015 loss_cls: 4.0879 (3.9296) grad_norm: 2.2985 (2.3376) time: 0.7724 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:31 root] (utils.py 283): INFO Epoch: [11] [1920/2502] eta: 0:07:25 lr: 0.000015 loss_cls: 4.0879 (3.9298) grad_norm: 2.2910 (2.3377) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:39 root] (utils.py 283): INFO Epoch: [11] [1930/2502] eta: 0:07:18 lr: 0.000015 loss_cls: 3.7948 (3.9281) grad_norm: 2.3615 (2.3382) time: 0.7808 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:47 root] (utils.py 283): INFO Epoch: [11] [1940/2502] eta: 0:07:10 lr: 0.000015 loss_cls: 3.7948 (3.9283) grad_norm: 2.3665 (2.3385) time: 0.7763 data: 0.0002 max mem: 8426 +[2024-12-10 17:40:54 root] (utils.py 283): INFO Epoch: [11] [1950/2502] eta: 0:07:02 lr: 0.000015 loss_cls: 4.1580 (3.9282) grad_norm: 2.3451 (2.3384) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:02 root] (utils.py 283): INFO Epoch: [11] [1960/2502] eta: 0:06:55 lr: 0.000015 loss_cls: 4.1976 (3.9297) grad_norm: 2.3540 (2.3387) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:10 root] (utils.py 283): INFO Epoch: [11] [1970/2502] eta: 0:06:47 lr: 0.000015 loss_cls: 4.2182 (3.9296) grad_norm: 2.3722 (2.3389) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:17 root] (utils.py 283): INFO Epoch: [11] [1980/2502] eta: 0:06:39 lr: 0.000015 loss_cls: 4.2679 (3.9307) grad_norm: 2.3186 (2.3387) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:25 root] (utils.py 283): INFO Epoch: [11] [1990/2502] eta: 0:06:32 lr: 0.000015 loss_cls: 4.0658 (3.9318) grad_norm: 2.3615 (2.3389) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:33 root] (utils.py 283): INFO Epoch: [11] [2000/2502] eta: 0:06:24 lr: 0.000015 loss_cls: 4.0371 (3.9303) grad_norm: 2.3734 (2.3391) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 17:41:40 root] (utils.py 283): INFO Epoch: [11] [2010/2502] eta: 0:06:16 lr: 0.000015 loss_cls: 3.5397 (3.9290) grad_norm: 2.3500 (2.3394) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:48 root] (utils.py 283): INFO Epoch: [11] [2020/2502] eta: 0:06:09 lr: 0.000015 loss_cls: 3.8964 (3.9298) grad_norm: 2.3114 (2.3393) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 17:41:55 root] (utils.py 283): INFO Epoch: [11] [2030/2502] eta: 0:06:01 lr: 0.000015 loss_cls: 3.9562 (3.9289) grad_norm: 2.2706 (2.3389) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 17:42:03 root] (utils.py 283): INFO Epoch: [11] [2040/2502] eta: 0:05:53 lr: 0.000015 loss_cls: 3.8876 (3.9298) grad_norm: 2.2596 (2.3386) time: 0.7690 data: 0.0003 max mem: 8426 +[2024-12-10 17:42:11 root] (utils.py 283): INFO Epoch: [11] [2050/2502] eta: 0:05:46 lr: 0.000015 loss_cls: 4.0025 (3.9301) grad_norm: 2.3333 (2.3389) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 17:42:18 root] (utils.py 283): INFO Epoch: [11] [2060/2502] eta: 0:05:38 lr: 0.000015 loss_cls: 4.0539 (3.9307) grad_norm: 2.2978 (2.3387) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 17:42:26 root] (utils.py 283): INFO Epoch: [11] [2070/2502] eta: 0:05:30 lr: 0.000015 loss_cls: 4.0295 (3.9296) grad_norm: 2.3114 (2.3388) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 17:42:34 root] (utils.py 283): INFO Epoch: [11] [2080/2502] eta: 0:05:23 lr: 0.000015 loss_cls: 3.6726 (3.9286) grad_norm: 2.3575 (2.3389) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 17:42:41 root] (utils.py 283): INFO Epoch: [11] [2090/2502] eta: 0:05:15 lr: 0.000015 loss_cls: 3.7410 (3.9280) grad_norm: 2.2582 (2.3383) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 17:42:49 root] (utils.py 283): INFO Epoch: [11] [2100/2502] eta: 0:05:07 lr: 0.000015 loss_cls: 3.7410 (3.9276) grad_norm: 2.2582 (2.3382) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 17:42:57 root] (utils.py 283): INFO Epoch: [11] [2110/2502] eta: 0:05:00 lr: 0.000015 loss_cls: 3.7871 (3.9269) grad_norm: 2.3677 (2.3385) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 17:43:04 root] (utils.py 283): INFO Epoch: [11] [2120/2502] eta: 0:04:52 lr: 0.000015 loss_cls: 4.0888 (3.9281) grad_norm: 2.3677 (2.3386) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 17:43:12 root] (utils.py 283): INFO Epoch: [11] [2130/2502] eta: 0:04:44 lr: 0.000015 loss_cls: 4.1146 (3.9288) grad_norm: 2.3208 (2.3386) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 17:43:20 root] (utils.py 283): INFO Epoch: [11] [2140/2502] eta: 0:04:37 lr: 0.000015 loss_cls: 4.0052 (3.9288) grad_norm: 2.3318 (2.3384) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 17:43:27 root] (utils.py 283): INFO Epoch: [11] [2150/2502] eta: 0:04:29 lr: 0.000015 loss_cls: 4.0495 (3.9291) grad_norm: 2.2987 (2.3382) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 17:43:35 root] (utils.py 283): INFO Epoch: [11] [2160/2502] eta: 0:04:21 lr: 0.000015 loss_cls: 4.0715 (3.9305) grad_norm: 2.3476 (2.3386) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 17:43:43 root] (utils.py 283): INFO Epoch: [11] [2170/2502] eta: 0:04:14 lr: 0.000015 loss_cls: 4.1880 (3.9310) grad_norm: 2.3830 (2.3387) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 17:43:50 root] (utils.py 283): INFO Epoch: [11] [2180/2502] eta: 0:04:06 lr: 0.000015 loss_cls: 4.1880 (3.9315) grad_norm: 2.3370 (2.3388) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 17:43:58 root] (utils.py 283): INFO Epoch: [11] [2190/2502] eta: 0:03:58 lr: 0.000015 loss_cls: 4.0233 (3.9310) grad_norm: 2.3412 (2.3389) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 17:44:05 root] (utils.py 283): INFO Epoch: [11] [2200/2502] eta: 0:03:51 lr: 0.000015 loss_cls: 4.0803 (3.9318) grad_norm: 2.4026 (2.3392) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 17:44:13 root] (utils.py 283): INFO Epoch: [11] [2210/2502] eta: 0:03:43 lr: 0.000015 loss_cls: 4.0803 (3.9315) grad_norm: 2.3375 (2.3390) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 17:44:21 root] (utils.py 283): INFO Epoch: [11] [2220/2502] eta: 0:03:36 lr: 0.000015 loss_cls: 3.7621 (3.9306) grad_norm: 2.2789 (2.3387) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 17:44:28 root] (utils.py 283): INFO Epoch: [11] [2230/2502] eta: 0:03:28 lr: 0.000015 loss_cls: 3.7723 (3.9307) grad_norm: 2.2674 (2.3388) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 17:44:36 root] (utils.py 283): INFO Epoch: [11] [2240/2502] eta: 0:03:20 lr: 0.000015 loss_cls: 3.8551 (3.9298) grad_norm: 2.2827 (2.3385) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 17:44:44 root] (utils.py 283): INFO Epoch: [11] [2250/2502] eta: 0:03:13 lr: 0.000015 loss_cls: 3.9341 (3.9298) grad_norm: 2.2677 (2.3381) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 17:44:51 root] (utils.py 283): INFO Epoch: [11] [2260/2502] eta: 0:03:05 lr: 0.000015 loss_cls: 3.9120 (3.9285) grad_norm: 2.3084 (2.3382) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 17:44:59 root] (utils.py 283): INFO Epoch: [11] [2270/2502] eta: 0:02:57 lr: 0.000015 loss_cls: 3.7858 (3.9287) grad_norm: 2.3421 (2.3383) time: 0.7698 data: 0.0003 max mem: 8426 +[2024-12-10 17:45:07 root] (utils.py 283): INFO Epoch: [11] [2280/2502] eta: 0:02:50 lr: 0.000015 loss_cls: 4.0301 (3.9295) grad_norm: 2.2539 (2.3378) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 17:45:14 root] (utils.py 283): INFO Epoch: [11] [2290/2502] eta: 0:02:42 lr: 0.000015 loss_cls: 4.1595 (3.9300) grad_norm: 2.2759 (2.3381) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 17:45:22 root] (utils.py 283): INFO Epoch: [11] [2300/2502] eta: 0:02:34 lr: 0.000015 loss_cls: 4.1157 (3.9305) grad_norm: 2.3744 (2.3384) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 17:45:30 root] (utils.py 283): INFO Epoch: [11] [2310/2502] eta: 0:02:27 lr: 0.000015 loss_cls: 4.1112 (3.9307) grad_norm: 2.3726 (2.3380) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 17:45:37 root] (utils.py 283): INFO Epoch: [11] [2320/2502] eta: 0:02:19 lr: 0.000015 loss_cls: 4.0368 (3.9310) grad_norm: 2.2729 (2.3380) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 17:45:45 root] (utils.py 283): INFO Epoch: [11] [2330/2502] eta: 0:02:11 lr: 0.000015 loss_cls: 4.1834 (3.9315) grad_norm: 2.3138 (2.3382) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 17:45:53 root] (utils.py 283): INFO Epoch: [11] [2340/2502] eta: 0:02:04 lr: 0.000015 loss_cls: 4.0351 (3.9314) grad_norm: 2.3650 (2.3383) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:00 root] (utils.py 283): INFO Epoch: [11] [2350/2502] eta: 0:01:56 lr: 0.000015 loss_cls: 4.0351 (3.9315) grad_norm: 2.2987 (2.3382) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:08 root] (utils.py 283): INFO Epoch: [11] [2360/2502] eta: 0:01:48 lr: 0.000015 loss_cls: 3.9402 (3.9311) grad_norm: 2.3388 (2.3383) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:16 root] (utils.py 283): INFO Epoch: [11] [2370/2502] eta: 0:01:41 lr: 0.000015 loss_cls: 3.6037 (3.9302) grad_norm: 2.3550 (2.3384) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:23 root] (utils.py 283): INFO Epoch: [11] [2380/2502] eta: 0:01:33 lr: 0.000015 loss_cls: 3.7070 (3.9294) grad_norm: 2.2575 (2.3381) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:31 root] (utils.py 283): INFO Epoch: [11] [2390/2502] eta: 0:01:25 lr: 0.000015 loss_cls: 3.9515 (3.9293) grad_norm: 2.3177 (2.3383) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:38 root] (utils.py 283): INFO Epoch: [11] [2400/2502] eta: 0:01:18 lr: 0.000015 loss_cls: 3.9515 (3.9286) grad_norm: 2.3310 (2.3382) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:46 root] (utils.py 283): INFO Epoch: [11] [2410/2502] eta: 0:01:10 lr: 0.000015 loss_cls: 3.8323 (3.9282) grad_norm: 2.2510 (2.3378) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 17:46:54 root] (utils.py 283): INFO Epoch: [11] [2420/2502] eta: 0:01:02 lr: 0.000015 loss_cls: 3.9012 (3.9288) grad_norm: 2.2626 (2.3376) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 17:47:02 root] (utils.py 283): INFO Epoch: [11] [2430/2502] eta: 0:00:55 lr: 0.000015 loss_cls: 4.1817 (3.9296) grad_norm: 2.2544 (2.3373) time: 0.7760 data: 0.0002 max mem: 8426 +[2024-12-10 17:47:09 root] (utils.py 283): INFO Epoch: [11] [2440/2502] eta: 0:00:47 lr: 0.000015 loss_cls: 4.2275 (3.9299) grad_norm: 2.3106 (2.3373) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 17:47:17 root] (utils.py 283): INFO Epoch: [11] [2450/2502] eta: 0:00:39 lr: 0.000015 loss_cls: 4.3006 (3.9309) grad_norm: 2.3518 (2.3374) time: 0.7754 data: 0.0002 max mem: 8426 +[2024-12-10 17:47:25 root] (utils.py 283): INFO Epoch: [11] [2460/2502] eta: 0:00:32 lr: 0.000015 loss_cls: 4.1845 (3.9297) grad_norm: 2.3290 (2.3374) time: 0.7768 data: 0.0002 max mem: 8426 +[2024-12-10 17:47:33 root] (utils.py 283): INFO Epoch: [11] [2470/2502] eta: 0:00:24 lr: 0.000015 loss_cls: 4.1017 (3.9303) grad_norm: 2.2958 (2.3372) time: 0.7779 data: 0.0003 max mem: 8426 +[2024-12-10 17:47:40 root] (utils.py 283): INFO Epoch: [11] [2480/2502] eta: 0:00:16 lr: 0.000015 loss_cls: 4.0546 (3.9306) grad_norm: 2.3162 (2.3372) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 17:47:48 root] (utils.py 283): INFO Epoch: [11] [2490/2502] eta: 0:00:09 lr: 0.000015 loss_cls: 3.8647 (3.9307) grad_norm: 2.3364 (2.3372) time: 0.7850 data: 0.0242 max mem: 8426 +[2024-12-10 17:47:56 root] (utils.py 283): INFO Epoch: [11] [2500/2502] eta: 0:00:01 lr: 0.000015 loss_cls: 4.0972 (3.9306) grad_norm: 2.3754 (2.3375) time: 0.7865 data: 0.0242 max mem: 8426 +[2024-12-10 17:47:57 root] (utils.py 283): INFO Epoch: [11] [2501/2502] eta: 0:00:00 lr: 0.000015 loss_cls: 4.0198 (3.9304) grad_norm: 2.3827 (2.3376) time: 0.7855 data: 0.0242 max mem: 8426 +[2024-12-10 17:47:57 root] (utils.py 297): INFO Epoch: [11] Total time: 0:31:57 (0.7664 s / it) +[2024-12-10 17:47:57 root] (engine.py 179): INFO Averaged stats:lr: 0.000015 loss_cls: 4.0198 (3.9277) grad_norm: 2.3827 (2.3376) +[2024-12-10 17:47:57 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6434 (0.6434) acc1: 87.5000 (87.5000) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1274 data: 0.0003 max mem: 8426 +[2024-12-10 17:47:58 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7437 (0.8087) acc1: 87.5000 (81.9602) acc3: 95.3125 (93.3239) acc5: 97.6562 (96.5909) time: 0.1277 data: 0.0004 max mem: 8426 +[2024-12-10 17:48:00 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8438 (0.8653) acc1: 78.9062 (80.4688) acc3: 91.4062 (92.7827) acc5: 96.0938 (95.7217) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 17:48:01 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9591 (0.8798) acc1: 77.3438 (79.4355) acc3: 92.9688 (93.0948) acc5: 95.3125 (95.6905) time: 0.1285 data: 0.0005 max mem: 8426 +[2024-12-10 17:48:02 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8319 (0.8654) acc1: 79.6875 (80.3163) acc3: 93.7500 (93.2355) acc5: 96.0938 (95.7508) time: 0.1292 data: 0.0005 max mem: 8426 +[2024-12-10 17:48:04 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0512 (0.9510) acc1: 75.7812 (78.4773) acc3: 88.2812 (91.7433) acc5: 92.1875 (94.5925) time: 0.1453 data: 0.0157 max mem: 8426 +[2024-12-10 17:48:05 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2334 (0.9960) acc1: 71.8750 (77.9457) acc3: 85.1562 (90.8555) acc5: 89.0625 (93.7500) time: 0.1468 data: 0.0173 max mem: 8426 +[2024-12-10 17:48:07 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2510 (1.0362) acc1: 75.0000 (77.1017) acc3: 86.7188 (90.2949) acc5: 90.6250 (93.3209) time: 0.1303 data: 0.0021 max mem: 8426 +[2024-12-10 17:48:08 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2532 (1.0726) acc1: 71.8750 (76.2731) acc3: 86.7188 (89.6798) acc5: 89.8438 (92.7951) time: 0.1305 data: 0.0028 max mem: 8426 +[2024-12-10 17:48:10 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2569 (1.1007) acc1: 69.5312 (75.5065) acc3: 85.9375 (89.3286) acc5: 89.0625 (92.5652) time: 0.1486 data: 0.0207 max mem: 8426 +[2024-12-10 17:48:10 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1622 (1.0894) acc1: 72.6562 (75.7120) acc3: 88.2812 (89.4720) acc5: 91.4062 (92.7280) time: 0.1493 data: 0.0206 max mem: 8426 +[2024-12-10 17:48:10 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1367 s / it) +[2024-12-10 17:48:11 root] (engine.py 264): INFO * Acc@1 75.470 Acc@3 89.544 Acc@5 92.800 loss 1.092 flops 1.285 layer_flops 1.251 +[2024-12-10 17:48:11 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 17:48:11 root] (main.py 576): INFO Max accuracy: 75.63% +[2024-12-10 17:48:12 root] (utils.py 283): INFO Epoch: [12] [ 0/2502] eta: 0:33:45 lr: 0.000014 loss_cls: 4.5397 (4.5397) grad_norm: 2.5121 (2.5121) time: 0.8095 data: 0.0005 max mem: 8426 +[2024-12-10 17:48:19 root] (utils.py 283): INFO Epoch: [12] [ 10/2502] eta: 0:32:27 lr: 0.000014 loss_cls: 4.0751 (3.8840) grad_norm: 2.3774 (2.4051) time: 0.7814 data: 0.0003 max mem: 8426 +[2024-12-10 17:48:27 root] (utils.py 283): INFO Epoch: [12] [ 20/2502] eta: 0:31:53 lr: 0.000014 loss_cls: 3.8343 (3.8746) grad_norm: 2.3603 (2.3920) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-10 17:48:35 root] (utils.py 283): INFO Epoch: [12] [ 30/2502] eta: 0:31:37 lr: 0.000014 loss_cls: 3.9472 (3.9324) grad_norm: 2.3603 (2.3730) time: 0.7602 data: 0.0003 max mem: 8426 +[2024-12-10 17:48:42 root] (utils.py 283): INFO Epoch: [12] [ 40/2502] eta: 0:31:32 lr: 0.000014 loss_cls: 4.0773 (3.9200) grad_norm: 2.3319 (2.3648) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 17:48:50 root] (utils.py 283): INFO Epoch: [12] [ 50/2502] eta: 0:31:19 lr: 0.000014 loss_cls: 4.1100 (3.9322) grad_norm: 2.3319 (2.3523) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 17:48:58 root] (utils.py 283): INFO Epoch: [12] [ 60/2502] eta: 0:31:10 lr: 0.000014 loss_cls: 3.8168 (3.8713) grad_norm: 2.3174 (2.3507) time: 0.7600 data: 0.0003 max mem: 8426 +[2024-12-10 17:49:05 root] (utils.py 283): INFO Epoch: [12] [ 70/2502] eta: 0:31:02 lr: 0.000014 loss_cls: 3.8168 (3.8784) grad_norm: 2.3346 (2.3506) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 17:49:13 root] (utils.py 283): INFO Epoch: [12] [ 80/2502] eta: 0:30:55 lr: 0.000014 loss_cls: 4.2146 (3.9239) grad_norm: 2.3171 (2.3470) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 17:49:21 root] (utils.py 283): INFO Epoch: [12] [ 90/2502] eta: 0:30:49 lr: 0.000014 loss_cls: 4.3438 (3.9419) grad_norm: 2.2862 (2.3422) time: 0.7705 data: 0.0003 max mem: 8426 +[2024-12-10 17:49:28 root] (utils.py 283): INFO Epoch: [12] [ 100/2502] eta: 0:30:41 lr: 0.000014 loss_cls: 4.2020 (3.9495) grad_norm: 2.2741 (2.3399) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 17:49:36 root] (utils.py 283): INFO Epoch: [12] [ 110/2502] eta: 0:30:34 lr: 0.000014 loss_cls: 4.1474 (3.9741) grad_norm: 2.2741 (2.3421) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 17:49:44 root] (utils.py 283): INFO Epoch: [12] [ 120/2502] eta: 0:30:25 lr: 0.000014 loss_cls: 4.0520 (3.9674) grad_norm: 2.2480 (2.3378) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 17:49:51 root] (utils.py 283): INFO Epoch: [12] [ 130/2502] eta: 0:30:16 lr: 0.000014 loss_cls: 4.2155 (3.9842) grad_norm: 2.3644 (2.3441) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 17:49:59 root] (utils.py 283): INFO Epoch: [12] [ 140/2502] eta: 0:30:08 lr: 0.000014 loss_cls: 4.0421 (3.9700) grad_norm: 2.3939 (2.3446) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 17:50:06 root] (utils.py 283): INFO Epoch: [12] [ 150/2502] eta: 0:30:00 lr: 0.000014 loss_cls: 3.9814 (3.9769) grad_norm: 2.2843 (2.3411) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 17:50:14 root] (utils.py 283): INFO Epoch: [12] [ 160/2502] eta: 0:29:52 lr: 0.000014 loss_cls: 3.9814 (3.9637) grad_norm: 2.3069 (2.3442) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 17:50:22 root] (utils.py 283): INFO Epoch: [12] [ 170/2502] eta: 0:29:43 lr: 0.000014 loss_cls: 3.9617 (3.9759) grad_norm: 2.4320 (2.3509) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 17:50:29 root] (utils.py 283): INFO Epoch: [12] [ 180/2502] eta: 0:29:34 lr: 0.000014 loss_cls: 4.0566 (3.9787) grad_norm: 2.4836 (2.3573) time: 0.7551 data: 0.0003 max mem: 8426 +[2024-12-10 17:50:37 root] (utils.py 283): INFO Epoch: [12] [ 190/2502] eta: 0:29:28 lr: 0.000014 loss_cls: 3.8334 (3.9761) grad_norm: 2.4424 (2.3615) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 17:50:45 root] (utils.py 283): INFO Epoch: [12] [ 200/2502] eta: 0:29:21 lr: 0.000014 loss_cls: 3.5592 (3.9561) grad_norm: 2.4075 (2.3598) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-10 17:50:52 root] (utils.py 283): INFO Epoch: [12] [ 210/2502] eta: 0:29:13 lr: 0.000014 loss_cls: 3.5592 (3.9409) grad_norm: 2.3176 (2.3578) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 17:51:00 root] (utils.py 283): INFO Epoch: [12] [ 220/2502] eta: 0:29:05 lr: 0.000014 loss_cls: 3.6718 (3.9409) grad_norm: 2.2839 (2.3544) time: 0.7600 data: 0.0003 max mem: 8426 +[2024-12-10 17:51:08 root] (utils.py 283): INFO Epoch: [12] [ 230/2502] eta: 0:28:59 lr: 0.000014 loss_cls: 4.1279 (3.9564) grad_norm: 2.2758 (2.3557) time: 0.7721 data: 0.0003 max mem: 8426 +[2024-12-10 17:51:15 root] (utils.py 283): INFO Epoch: [12] [ 240/2502] eta: 0:28:50 lr: 0.000014 loss_cls: 4.2164 (3.9614) grad_norm: 2.3603 (2.3578) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 17:51:23 root] (utils.py 283): INFO Epoch: [12] [ 250/2502] eta: 0:28:41 lr: 0.000014 loss_cls: 4.0504 (3.9564) grad_norm: 2.3096 (2.3549) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 17:51:31 root] (utils.py 283): INFO Epoch: [12] [ 260/2502] eta: 0:28:34 lr: 0.000014 loss_cls: 3.9350 (3.9484) grad_norm: 2.3096 (2.3559) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 17:51:38 root] (utils.py 283): INFO Epoch: [12] [ 270/2502] eta: 0:28:26 lr: 0.000014 loss_cls: 3.8703 (3.9447) grad_norm: 2.3211 (2.3531) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 17:51:46 root] (utils.py 283): INFO Epoch: [12] [ 280/2502] eta: 0:28:19 lr: 0.000014 loss_cls: 4.0061 (3.9488) grad_norm: 2.2592 (2.3522) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 17:51:53 root] (utils.py 283): INFO Epoch: [12] [ 290/2502] eta: 0:28:11 lr: 0.000014 loss_cls: 4.1313 (3.9495) grad_norm: 2.2592 (2.3501) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 17:52:01 root] (utils.py 283): INFO Epoch: [12] [ 300/2502] eta: 0:28:02 lr: 0.000014 loss_cls: 3.9379 (3.9490) grad_norm: 2.3150 (2.3514) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 17:52:08 root] (utils.py 283): INFO Epoch: [12] [ 310/2502] eta: 0:27:54 lr: 0.000014 loss_cls: 3.9379 (3.9473) grad_norm: 2.3601 (2.3517) time: 0.7563 data: 0.0002 max mem: 8426 +[2024-12-10 17:52:16 root] (utils.py 283): INFO Epoch: [12] [ 320/2502] eta: 0:27:46 lr: 0.000014 loss_cls: 4.0846 (3.9495) grad_norm: 2.2999 (2.3503) time: 0.7548 data: 0.0003 max mem: 8426 +[2024-12-10 17:52:24 root] (utils.py 283): INFO Epoch: [12] [ 330/2502] eta: 0:27:38 lr: 0.000014 loss_cls: 3.9998 (3.9427) grad_norm: 2.3548 (2.3514) time: 0.7593 data: 0.0003 max mem: 8426 +[2024-12-10 17:52:31 root] (utils.py 283): INFO Epoch: [12] [ 340/2502] eta: 0:27:30 lr: 0.000014 loss_cls: 3.8469 (3.9505) grad_norm: 2.3589 (2.3546) time: 0.7588 data: 0.0003 max mem: 8426 +[2024-12-10 17:52:39 root] (utils.py 283): INFO Epoch: [12] [ 350/2502] eta: 0:27:23 lr: 0.000014 loss_cls: 3.8827 (3.9458) grad_norm: 2.3082 (2.3520) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 17:52:47 root] (utils.py 283): INFO Epoch: [12] [ 360/2502] eta: 0:27:17 lr: 0.000014 loss_cls: 3.8827 (3.9454) grad_norm: 2.3198 (2.3514) time: 0.7846 data: 0.0003 max mem: 8426 +[2024-12-10 17:52:55 root] (utils.py 283): INFO Epoch: [12] [ 370/2502] eta: 0:27:10 lr: 0.000014 loss_cls: 4.0124 (3.9476) grad_norm: 2.3253 (2.3516) time: 0.7818 data: 0.0003 max mem: 8426 +[2024-12-10 17:53:02 root] (utils.py 283): INFO Epoch: [12] [ 380/2502] eta: 0:27:02 lr: 0.000014 loss_cls: 4.1155 (3.9393) grad_norm: 2.3140 (2.3511) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:10 root] (utils.py 283): INFO Epoch: [12] [ 390/2502] eta: 0:26:54 lr: 0.000014 loss_cls: 3.3872 (3.9307) grad_norm: 2.3265 (2.3523) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:17 root] (utils.py 283): INFO Epoch: [12] [ 400/2502] eta: 0:26:47 lr: 0.000014 loss_cls: 3.8215 (3.9348) grad_norm: 2.3458 (2.3516) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:25 root] (utils.py 283): INFO Epoch: [12] [ 410/2502] eta: 0:26:38 lr: 0.000014 loss_cls: 4.1582 (3.9366) grad_norm: 2.3343 (2.3509) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:33 root] (utils.py 283): INFO Epoch: [12] [ 420/2502] eta: 0:26:30 lr: 0.000014 loss_cls: 4.1582 (3.9419) grad_norm: 2.2774 (2.3506) time: 0.7547 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:40 root] (utils.py 283): INFO Epoch: [12] [ 430/2502] eta: 0:26:22 lr: 0.000014 loss_cls: 4.1529 (3.9383) grad_norm: 2.3052 (2.3511) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:48 root] (utils.py 283): INFO Epoch: [12] [ 440/2502] eta: 0:26:14 lr: 0.000014 loss_cls: 4.1529 (3.9413) grad_norm: 2.3052 (2.3496) time: 0.7561 data: 0.0002 max mem: 8426 +[2024-12-10 17:53:55 root] (utils.py 283): INFO Epoch: [12] [ 450/2502] eta: 0:26:06 lr: 0.000014 loss_cls: 4.1867 (3.9421) grad_norm: 2.2976 (2.3495) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 17:54:03 root] (utils.py 283): INFO Epoch: [12] [ 460/2502] eta: 0:25:59 lr: 0.000014 loss_cls: 4.1867 (3.9479) grad_norm: 2.3039 (2.3492) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 17:54:11 root] (utils.py 283): INFO Epoch: [12] [ 470/2502] eta: 0:25:51 lr: 0.000014 loss_cls: 4.0705 (3.9478) grad_norm: 2.2945 (2.3479) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 17:54:18 root] (utils.py 283): INFO Epoch: [12] [ 480/2502] eta: 0:25:43 lr: 0.000014 loss_cls: 3.7903 (3.9454) grad_norm: 2.3077 (2.3475) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 17:54:26 root] (utils.py 283): INFO Epoch: [12] [ 490/2502] eta: 0:25:36 lr: 0.000014 loss_cls: 3.6160 (3.9386) grad_norm: 2.3156 (2.3478) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 17:54:33 root] (utils.py 283): INFO Epoch: [12] [ 500/2502] eta: 0:25:27 lr: 0.000014 loss_cls: 3.4986 (3.9345) grad_norm: 2.3465 (2.3480) time: 0.7545 data: 0.0003 max mem: 8426 +[2024-12-10 17:54:41 root] (utils.py 283): INFO Epoch: [12] [ 510/2502] eta: 0:25:19 lr: 0.000014 loss_cls: 3.4986 (3.9289) grad_norm: 2.3692 (2.3497) time: 0.7532 data: 0.0003 max mem: 8426 +[2024-12-10 17:54:48 root] (utils.py 283): INFO Epoch: [12] [ 520/2502] eta: 0:25:12 lr: 0.000014 loss_cls: 3.5795 (3.9303) grad_norm: 2.3986 (2.3505) time: 0.7545 data: 0.0003 max mem: 8426 +[2024-12-10 17:54:56 root] (utils.py 283): INFO Epoch: [12] [ 530/2502] eta: 0:25:04 lr: 0.000014 loss_cls: 3.9554 (3.9310) grad_norm: 2.3358 (2.3505) time: 0.7546 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:03 root] (utils.py 283): INFO Epoch: [12] [ 540/2502] eta: 0:24:56 lr: 0.000014 loss_cls: 3.6798 (3.9246) grad_norm: 2.3120 (2.3498) time: 0.7541 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:11 root] (utils.py 283): INFO Epoch: [12] [ 550/2502] eta: 0:24:48 lr: 0.000014 loss_cls: 3.6117 (3.9189) grad_norm: 2.2987 (2.3488) time: 0.7538 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:19 root] (utils.py 283): INFO Epoch: [12] [ 560/2502] eta: 0:24:40 lr: 0.000014 loss_cls: 3.8388 (3.9199) grad_norm: 2.3279 (2.3484) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:26 root] (utils.py 283): INFO Epoch: [12] [ 570/2502] eta: 0:24:32 lr: 0.000014 loss_cls: 4.1753 (3.9215) grad_norm: 2.3114 (2.3479) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:34 root] (utils.py 283): INFO Epoch: [12] [ 580/2502] eta: 0:24:24 lr: 0.000014 loss_cls: 4.2179 (3.9209) grad_norm: 2.3114 (2.3470) time: 0.7520 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:41 root] (utils.py 283): INFO Epoch: [12] [ 590/2502] eta: 0:24:17 lr: 0.000014 loss_cls: 4.1014 (3.9224) grad_norm: 2.3175 (2.3463) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:49 root] (utils.py 283): INFO Epoch: [12] [ 600/2502] eta: 0:24:09 lr: 0.000014 loss_cls: 4.0710 (3.9236) grad_norm: 2.2626 (2.3449) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 17:55:57 root] (utils.py 283): INFO Epoch: [12] [ 610/2502] eta: 0:24:02 lr: 0.000014 loss_cls: 3.9706 (3.9231) grad_norm: 2.2534 (2.3450) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 17:56:04 root] (utils.py 283): INFO Epoch: [12] [ 620/2502] eta: 0:23:54 lr: 0.000014 loss_cls: 4.1078 (3.9238) grad_norm: 2.3494 (2.3455) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 17:56:12 root] (utils.py 283): INFO Epoch: [12] [ 630/2502] eta: 0:23:46 lr: 0.000014 loss_cls: 4.1248 (3.9267) grad_norm: 2.2953 (2.3444) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 17:56:19 root] (utils.py 283): INFO Epoch: [12] [ 640/2502] eta: 0:23:38 lr: 0.000014 loss_cls: 4.2409 (3.9308) grad_norm: 2.2960 (2.3450) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 17:56:27 root] (utils.py 283): INFO Epoch: [12] [ 650/2502] eta: 0:23:30 lr: 0.000014 loss_cls: 4.0868 (3.9284) grad_norm: 2.3673 (2.3464) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 17:56:35 root] (utils.py 283): INFO Epoch: [12] [ 660/2502] eta: 0:23:23 lr: 0.000014 loss_cls: 3.7245 (3.9236) grad_norm: 2.3658 (2.3470) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 17:56:42 root] (utils.py 283): INFO Epoch: [12] [ 670/2502] eta: 0:23:15 lr: 0.000014 loss_cls: 3.5847 (3.9216) grad_norm: 2.3658 (2.3475) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 17:56:50 root] (utils.py 283): INFO Epoch: [12] [ 680/2502] eta: 0:23:08 lr: 0.000014 loss_cls: 3.9647 (3.9220) grad_norm: 2.3726 (2.3475) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 17:56:58 root] (utils.py 283): INFO Epoch: [12] [ 690/2502] eta: 0:23:00 lr: 0.000014 loss_cls: 4.0333 (3.9225) grad_norm: 2.3503 (2.3470) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 17:57:05 root] (utils.py 283): INFO Epoch: [12] [ 700/2502] eta: 0:22:53 lr: 0.000014 loss_cls: 4.0333 (3.9228) grad_norm: 2.2935 (2.3460) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 17:57:13 root] (utils.py 283): INFO Epoch: [12] [ 710/2502] eta: 0:22:45 lr: 0.000014 loss_cls: 4.0165 (3.9240) grad_norm: 2.2844 (2.3460) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 17:57:20 root] (utils.py 283): INFO Epoch: [12] [ 720/2502] eta: 0:22:37 lr: 0.000014 loss_cls: 3.9860 (3.9227) grad_norm: 2.2823 (2.3452) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 17:57:28 root] (utils.py 283): INFO Epoch: [12] [ 730/2502] eta: 0:22:30 lr: 0.000014 loss_cls: 4.0365 (3.9250) grad_norm: 2.2823 (2.3456) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 17:57:35 root] (utils.py 283): INFO Epoch: [12] [ 740/2502] eta: 0:22:22 lr: 0.000014 loss_cls: 4.1017 (3.9254) grad_norm: 2.2784 (2.3448) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 17:57:43 root] (utils.py 283): INFO Epoch: [12] [ 750/2502] eta: 0:22:14 lr: 0.000014 loss_cls: 4.1676 (3.9290) grad_norm: 2.2595 (2.3443) time: 0.7559 data: 0.0003 max mem: 8426 +[2024-12-10 17:57:51 root] (utils.py 283): INFO Epoch: [12] [ 760/2502] eta: 0:22:07 lr: 0.000014 loss_cls: 4.1936 (3.9293) grad_norm: 2.2966 (2.3444) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 17:57:58 root] (utils.py 283): INFO Epoch: [12] [ 770/2502] eta: 0:21:59 lr: 0.000014 loss_cls: 4.1221 (3.9310) grad_norm: 2.3076 (2.3451) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 17:58:06 root] (utils.py 283): INFO Epoch: [12] [ 780/2502] eta: 0:21:52 lr: 0.000014 loss_cls: 4.0057 (3.9317) grad_norm: 2.3076 (2.3445) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 17:58:14 root] (utils.py 283): INFO Epoch: [12] [ 790/2502] eta: 0:21:44 lr: 0.000014 loss_cls: 4.2099 (3.9383) grad_norm: 2.2682 (2.3438) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 17:58:21 root] (utils.py 283): INFO Epoch: [12] [ 800/2502] eta: 0:21:37 lr: 0.000014 loss_cls: 4.3139 (3.9422) grad_norm: 2.2756 (2.3448) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 17:58:29 root] (utils.py 283): INFO Epoch: [12] [ 810/2502] eta: 0:21:29 lr: 0.000014 loss_cls: 4.2167 (3.9415) grad_norm: 2.2647 (2.3443) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 17:58:37 root] (utils.py 283): INFO Epoch: [12] [ 820/2502] eta: 0:21:21 lr: 0.000014 loss_cls: 4.0591 (3.9421) grad_norm: 2.2734 (2.3442) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 17:58:44 root] (utils.py 283): INFO Epoch: [12] [ 830/2502] eta: 0:21:14 lr: 0.000014 loss_cls: 4.0931 (3.9429) grad_norm: 2.3440 (2.3441) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 17:58:52 root] (utils.py 283): INFO Epoch: [12] [ 840/2502] eta: 0:21:06 lr: 0.000014 loss_cls: 4.1314 (3.9468) grad_norm: 2.2862 (2.3435) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 17:59:00 root] (utils.py 283): INFO Epoch: [12] [ 850/2502] eta: 0:20:59 lr: 0.000014 loss_cls: 4.1314 (3.9486) grad_norm: 2.2670 (2.3427) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 17:59:07 root] (utils.py 283): INFO Epoch: [12] [ 860/2502] eta: 0:20:51 lr: 0.000014 loss_cls: 4.1153 (3.9480) grad_norm: 2.2533 (2.3422) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 17:59:15 root] (utils.py 283): INFO Epoch: [12] [ 870/2502] eta: 0:20:44 lr: 0.000014 loss_cls: 3.8989 (3.9457) grad_norm: 2.2916 (2.3420) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 17:59:23 root] (utils.py 283): INFO Epoch: [12] [ 880/2502] eta: 0:20:36 lr: 0.000014 loss_cls: 3.8923 (3.9443) grad_norm: 2.2967 (2.3430) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 17:59:30 root] (utils.py 283): INFO Epoch: [12] [ 890/2502] eta: 0:20:28 lr: 0.000014 loss_cls: 3.9809 (3.9459) grad_norm: 2.3264 (2.3422) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 17:59:38 root] (utils.py 283): INFO Epoch: [12] [ 900/2502] eta: 0:20:21 lr: 0.000014 loss_cls: 4.0874 (3.9446) grad_norm: 2.3699 (2.3435) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 17:59:45 root] (utils.py 283): INFO Epoch: [12] [ 910/2502] eta: 0:20:13 lr: 0.000014 loss_cls: 4.0852 (3.9476) grad_norm: 2.3367 (2.3429) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 17:59:53 root] (utils.py 283): INFO Epoch: [12] [ 920/2502] eta: 0:20:05 lr: 0.000014 loss_cls: 4.0311 (3.9423) grad_norm: 2.2851 (2.3424) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 18:00:01 root] (utils.py 283): INFO Epoch: [12] [ 930/2502] eta: 0:19:58 lr: 0.000014 loss_cls: 3.5778 (3.9393) grad_norm: 2.2729 (2.3423) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:08 root] (utils.py 283): INFO Epoch: [12] [ 940/2502] eta: 0:19:50 lr: 0.000014 loss_cls: 4.0460 (3.9400) grad_norm: 2.3640 (2.3429) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:16 root] (utils.py 283): INFO Epoch: [12] [ 950/2502] eta: 0:19:43 lr: 0.000014 loss_cls: 3.9938 (3.9386) grad_norm: 2.3295 (2.3427) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:24 root] (utils.py 283): INFO Epoch: [12] [ 960/2502] eta: 0:19:35 lr: 0.000014 loss_cls: 3.9560 (3.9390) grad_norm: 2.3241 (2.3424) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:31 root] (utils.py 283): INFO Epoch: [12] [ 970/2502] eta: 0:19:28 lr: 0.000014 loss_cls: 4.0702 (3.9381) grad_norm: 2.3425 (2.3422) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:39 root] (utils.py 283): INFO Epoch: [12] [ 980/2502] eta: 0:19:20 lr: 0.000014 loss_cls: 3.8594 (3.9367) grad_norm: 2.3220 (2.3426) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:47 root] (utils.py 283): INFO Epoch: [12] [ 990/2502] eta: 0:19:13 lr: 0.000014 loss_cls: 4.0560 (3.9380) grad_norm: 2.3115 (2.3423) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 18:00:54 root] (utils.py 283): INFO Epoch: [12] [1000/2502] eta: 0:19:05 lr: 0.000014 loss_cls: 3.9514 (3.9363) grad_norm: 2.2550 (2.3420) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 18:01:02 root] (utils.py 283): INFO Epoch: [12] [1010/2502] eta: 0:18:58 lr: 0.000014 loss_cls: 3.8153 (3.9376) grad_norm: 2.2550 (2.3414) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 18:01:10 root] (utils.py 283): INFO Epoch: [12] [1020/2502] eta: 0:18:50 lr: 0.000014 loss_cls: 4.2090 (3.9405) grad_norm: 2.2664 (2.3412) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 18:01:18 root] (utils.py 283): INFO Epoch: [12] [1030/2502] eta: 0:18:43 lr: 0.000014 loss_cls: 4.2865 (3.9424) grad_norm: 2.3298 (2.3416) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 18:01:25 root] (utils.py 283): INFO Epoch: [12] [1040/2502] eta: 0:18:35 lr: 0.000014 loss_cls: 4.4912 (3.9439) grad_norm: 2.3340 (2.3414) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 18:01:33 root] (utils.py 283): INFO Epoch: [12] [1050/2502] eta: 0:18:27 lr: 0.000014 loss_cls: 4.0298 (3.9433) grad_norm: 2.3358 (2.3415) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 18:01:41 root] (utils.py 283): INFO Epoch: [12] [1060/2502] eta: 0:18:20 lr: 0.000014 loss_cls: 4.0298 (3.9455) grad_norm: 2.3358 (2.3415) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 18:01:48 root] (utils.py 283): INFO Epoch: [12] [1070/2502] eta: 0:18:12 lr: 0.000014 loss_cls: 4.0691 (3.9475) grad_norm: 2.3466 (2.3414) time: 0.7740 data: 0.0003 max mem: 8426 +[2024-12-10 18:01:56 root] (utils.py 283): INFO Epoch: [12] [1080/2502] eta: 0:18:05 lr: 0.000014 loss_cls: 4.1348 (3.9494) grad_norm: 2.3679 (2.3416) time: 0.7785 data: 0.0003 max mem: 8426 +[2024-12-10 18:02:04 root] (utils.py 283): INFO Epoch: [12] [1090/2502] eta: 0:17:57 lr: 0.000014 loss_cls: 4.1348 (3.9502) grad_norm: 2.3211 (2.3415) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-10 18:02:11 root] (utils.py 283): INFO Epoch: [12] [1100/2502] eta: 0:17:50 lr: 0.000014 loss_cls: 4.0732 (3.9502) grad_norm: 2.3211 (2.3417) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 18:02:19 root] (utils.py 283): INFO Epoch: [12] [1110/2502] eta: 0:17:42 lr: 0.000014 loss_cls: 4.2385 (3.9537) grad_norm: 2.3345 (2.3423) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:02:27 root] (utils.py 283): INFO Epoch: [12] [1120/2502] eta: 0:17:35 lr: 0.000014 loss_cls: 4.2216 (3.9545) grad_norm: 2.3972 (2.3427) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 18:02:34 root] (utils.py 283): INFO Epoch: [12] [1130/2502] eta: 0:17:27 lr: 0.000014 loss_cls: 4.1011 (3.9549) grad_norm: 2.3715 (2.3424) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 18:02:42 root] (utils.py 283): INFO Epoch: [12] [1140/2502] eta: 0:17:19 lr: 0.000014 loss_cls: 4.0422 (3.9528) grad_norm: 2.3911 (2.3432) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 18:02:50 root] (utils.py 283): INFO Epoch: [12] [1150/2502] eta: 0:17:12 lr: 0.000014 loss_cls: 4.0242 (3.9539) grad_norm: 2.3447 (2.3428) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 18:02:57 root] (utils.py 283): INFO Epoch: [12] [1160/2502] eta: 0:17:04 lr: 0.000014 loss_cls: 4.0701 (3.9526) grad_norm: 2.2891 (2.3430) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 18:03:05 root] (utils.py 283): INFO Epoch: [12] [1170/2502] eta: 0:16:56 lr: 0.000014 loss_cls: 3.8610 (3.9506) grad_norm: 2.3114 (2.3427) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 18:03:13 root] (utils.py 283): INFO Epoch: [12] [1180/2502] eta: 0:16:49 lr: 0.000014 loss_cls: 3.5493 (3.9495) grad_norm: 2.2776 (2.3425) time: 0.7796 data: 0.0002 max mem: 8426 +[2024-12-10 18:03:21 root] (utils.py 283): INFO Epoch: [12] [1190/2502] eta: 0:16:42 lr: 0.000014 loss_cls: 3.9105 (3.9483) grad_norm: 2.2119 (2.3422) time: 0.7842 data: 0.0003 max mem: 8426 +[2024-12-10 18:03:29 root] (utils.py 283): INFO Epoch: [12] [1200/2502] eta: 0:16:34 lr: 0.000014 loss_cls: 3.8362 (3.9472) grad_norm: 2.3215 (2.3422) time: 0.7829 data: 0.0003 max mem: 8426 +[2024-12-10 18:03:36 root] (utils.py 283): INFO Epoch: [12] [1210/2502] eta: 0:16:27 lr: 0.000014 loss_cls: 4.0786 (3.9489) grad_norm: 2.3416 (2.3420) time: 0.7822 data: 0.0003 max mem: 8426 +[2024-12-10 18:03:44 root] (utils.py 283): INFO Epoch: [12] [1220/2502] eta: 0:16:19 lr: 0.000014 loss_cls: 4.2886 (3.9501) grad_norm: 2.3431 (2.3423) time: 0.7797 data: 0.0003 max mem: 8426 +[2024-12-10 18:03:52 root] (utils.py 283): INFO Epoch: [12] [1230/2502] eta: 0:16:12 lr: 0.000014 loss_cls: 4.1932 (3.9502) grad_norm: 2.3771 (2.3426) time: 0.7780 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:00 root] (utils.py 283): INFO Epoch: [12] [1240/2502] eta: 0:16:04 lr: 0.000014 loss_cls: 4.2675 (3.9520) grad_norm: 2.3601 (2.3426) time: 0.7784 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:07 root] (utils.py 283): INFO Epoch: [12] [1250/2502] eta: 0:15:57 lr: 0.000014 loss_cls: 4.0876 (3.9502) grad_norm: 2.3061 (2.3426) time: 0.7793 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:15 root] (utils.py 283): INFO Epoch: [12] [1260/2502] eta: 0:15:49 lr: 0.000014 loss_cls: 3.9976 (3.9512) grad_norm: 2.3061 (2.3430) time: 0.7825 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:23 root] (utils.py 283): INFO Epoch: [12] [1270/2502] eta: 0:15:42 lr: 0.000014 loss_cls: 3.9214 (3.9494) grad_norm: 2.3364 (2.3431) time: 0.7833 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:31 root] (utils.py 283): INFO Epoch: [12] [1280/2502] eta: 0:15:34 lr: 0.000014 loss_cls: 3.7194 (3.9469) grad_norm: 2.3460 (2.3437) time: 0.7765 data: 0.0002 max mem: 8426 +[2024-12-10 18:04:39 root] (utils.py 283): INFO Epoch: [12] [1290/2502] eta: 0:15:27 lr: 0.000014 loss_cls: 3.9922 (3.9481) grad_norm: 2.4029 (2.3443) time: 0.7708 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:46 root] (utils.py 283): INFO Epoch: [12] [1300/2502] eta: 0:15:19 lr: 0.000014 loss_cls: 3.9362 (3.9453) grad_norm: 2.4029 (2.3441) time: 0.7709 data: 0.0003 max mem: 8426 +[2024-12-10 18:04:54 root] (utils.py 283): INFO Epoch: [12] [1310/2502] eta: 0:15:11 lr: 0.000014 loss_cls: 3.9468 (3.9476) grad_norm: 2.3346 (2.3441) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 18:05:02 root] (utils.py 283): INFO Epoch: [12] [1320/2502] eta: 0:15:04 lr: 0.000014 loss_cls: 4.1645 (3.9482) grad_norm: 2.3151 (2.3440) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 18:05:09 root] (utils.py 283): INFO Epoch: [12] [1330/2502] eta: 0:14:56 lr: 0.000014 loss_cls: 4.0732 (3.9474) grad_norm: 2.2927 (2.3435) time: 0.7701 data: 0.0003 max mem: 8426 +[2024-12-10 18:05:17 root] (utils.py 283): INFO Epoch: [12] [1340/2502] eta: 0:14:49 lr: 0.000014 loss_cls: 4.0802 (3.9477) grad_norm: 2.2794 (2.3438) time: 0.7767 data: 0.0002 max mem: 8426 +[2024-12-10 18:05:25 root] (utils.py 283): INFO Epoch: [12] [1350/2502] eta: 0:14:41 lr: 0.000014 loss_cls: 4.0487 (3.9478) grad_norm: 2.2992 (2.3438) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 18:05:32 root] (utils.py 283): INFO Epoch: [12] [1360/2502] eta: 0:14:33 lr: 0.000014 loss_cls: 3.9480 (3.9473) grad_norm: 2.2790 (2.3432) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 18:05:40 root] (utils.py 283): INFO Epoch: [12] [1370/2502] eta: 0:14:26 lr: 0.000014 loss_cls: 4.0802 (3.9487) grad_norm: 2.2703 (2.3434) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 18:05:48 root] (utils.py 283): INFO Epoch: [12] [1380/2502] eta: 0:14:18 lr: 0.000014 loss_cls: 4.0272 (3.9468) grad_norm: 2.3063 (2.3429) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 18:05:55 root] (utils.py 283): INFO Epoch: [12] [1390/2502] eta: 0:14:10 lr: 0.000014 loss_cls: 3.7784 (3.9466) grad_norm: 2.2938 (2.3425) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 18:06:03 root] (utils.py 283): INFO Epoch: [12] [1400/2502] eta: 0:14:03 lr: 0.000014 loss_cls: 4.0659 (3.9490) grad_norm: 2.3353 (2.3430) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 18:06:11 root] (utils.py 283): INFO Epoch: [12] [1410/2502] eta: 0:13:55 lr: 0.000014 loss_cls: 4.4016 (3.9500) grad_norm: 2.4127 (2.3434) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 18:06:18 root] (utils.py 283): INFO Epoch: [12] [1420/2502] eta: 0:13:47 lr: 0.000014 loss_cls: 4.2653 (3.9497) grad_norm: 2.3430 (2.3433) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 18:06:26 root] (utils.py 283): INFO Epoch: [12] [1430/2502] eta: 0:13:40 lr: 0.000014 loss_cls: 3.9799 (3.9492) grad_norm: 2.2933 (2.3430) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 18:06:34 root] (utils.py 283): INFO Epoch: [12] [1440/2502] eta: 0:13:32 lr: 0.000014 loss_cls: 4.0144 (3.9504) grad_norm: 2.3203 (2.3431) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 18:06:41 root] (utils.py 283): INFO Epoch: [12] [1450/2502] eta: 0:13:24 lr: 0.000014 loss_cls: 4.1942 (3.9511) grad_norm: 2.3661 (2.3435) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 18:06:49 root] (utils.py 283): INFO Epoch: [12] [1460/2502] eta: 0:13:17 lr: 0.000014 loss_cls: 4.0641 (3.9508) grad_norm: 2.3352 (2.3438) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 18:06:56 root] (utils.py 283): INFO Epoch: [12] [1470/2502] eta: 0:13:09 lr: 0.000014 loss_cls: 4.0641 (3.9507) grad_norm: 2.3255 (2.3439) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:04 root] (utils.py 283): INFO Epoch: [12] [1480/2502] eta: 0:13:01 lr: 0.000014 loss_cls: 4.1311 (3.9514) grad_norm: 2.2925 (2.3434) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 18:07:12 root] (utils.py 283): INFO Epoch: [12] [1490/2502] eta: 0:12:54 lr: 0.000014 loss_cls: 4.0631 (3.9501) grad_norm: 2.3592 (2.3438) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:19 root] (utils.py 283): INFO Epoch: [12] [1500/2502] eta: 0:12:46 lr: 0.000014 loss_cls: 3.9516 (3.9513) grad_norm: 2.3545 (2.3438) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:27 root] (utils.py 283): INFO Epoch: [12] [1510/2502] eta: 0:12:39 lr: 0.000014 loss_cls: 4.0384 (3.9516) grad_norm: 2.3276 (2.3438) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:35 root] (utils.py 283): INFO Epoch: [12] [1520/2502] eta: 0:12:31 lr: 0.000014 loss_cls: 4.0486 (3.9495) grad_norm: 2.3511 (2.3440) time: 0.7745 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:43 root] (utils.py 283): INFO Epoch: [12] [1530/2502] eta: 0:12:23 lr: 0.000014 loss_cls: 3.4213 (3.9476) grad_norm: 2.3039 (2.3439) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:50 root] (utils.py 283): INFO Epoch: [12] [1540/2502] eta: 0:12:16 lr: 0.000014 loss_cls: 3.9593 (3.9470) grad_norm: 2.2798 (2.3435) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 18:07:58 root] (utils.py 283): INFO Epoch: [12] [1550/2502] eta: 0:12:08 lr: 0.000014 loss_cls: 4.0807 (3.9482) grad_norm: 2.2560 (2.3439) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 18:08:06 root] (utils.py 283): INFO Epoch: [12] [1560/2502] eta: 0:12:00 lr: 0.000014 loss_cls: 4.2170 (3.9488) grad_norm: 2.3636 (2.3438) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 18:08:13 root] (utils.py 283): INFO Epoch: [12] [1570/2502] eta: 0:11:53 lr: 0.000014 loss_cls: 4.2170 (3.9502) grad_norm: 2.2773 (2.3435) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 18:08:21 root] (utils.py 283): INFO Epoch: [12] [1580/2502] eta: 0:11:45 lr: 0.000014 loss_cls: 3.9075 (3.9480) grad_norm: 2.2993 (2.3436) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 18:08:28 root] (utils.py 283): INFO Epoch: [12] [1590/2502] eta: 0:11:37 lr: 0.000014 loss_cls: 3.7001 (3.9478) grad_norm: 2.3515 (2.3436) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 18:08:36 root] (utils.py 283): INFO Epoch: [12] [1600/2502] eta: 0:11:30 lr: 0.000014 loss_cls: 3.7607 (3.9464) grad_norm: 2.3446 (2.3433) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 18:08:44 root] (utils.py 283): INFO Epoch: [12] [1610/2502] eta: 0:11:22 lr: 0.000014 loss_cls: 3.7607 (3.9460) grad_norm: 2.3028 (2.3430) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 18:08:51 root] (utils.py 283): INFO Epoch: [12] [1620/2502] eta: 0:11:14 lr: 0.000014 loss_cls: 3.9917 (3.9461) grad_norm: 2.3028 (2.3427) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 18:08:59 root] (utils.py 283): INFO Epoch: [12] [1630/2502] eta: 0:11:07 lr: 0.000014 loss_cls: 4.1923 (3.9466) grad_norm: 2.3286 (2.3426) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:07 root] (utils.py 283): INFO Epoch: [12] [1640/2502] eta: 0:10:59 lr: 0.000014 loss_cls: 4.1923 (3.9473) grad_norm: 2.3322 (2.3428) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:14 root] (utils.py 283): INFO Epoch: [12] [1650/2502] eta: 0:10:51 lr: 0.000014 loss_cls: 4.1467 (3.9479) grad_norm: 2.3758 (2.3427) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:22 root] (utils.py 283): INFO Epoch: [12] [1660/2502] eta: 0:10:44 lr: 0.000014 loss_cls: 4.2236 (3.9494) grad_norm: 2.2844 (2.3429) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:30 root] (utils.py 283): INFO Epoch: [12] [1670/2502] eta: 0:10:36 lr: 0.000014 loss_cls: 4.0728 (3.9476) grad_norm: 2.3027 (2.3430) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:37 root] (utils.py 283): INFO Epoch: [12] [1680/2502] eta: 0:10:29 lr: 0.000014 loss_cls: 3.8949 (3.9492) grad_norm: 2.3347 (2.3429) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:45 root] (utils.py 283): INFO Epoch: [12] [1690/2502] eta: 0:10:21 lr: 0.000014 loss_cls: 4.1761 (3.9494) grad_norm: 2.3217 (2.3429) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 18:09:53 root] (utils.py 283): INFO Epoch: [12] [1700/2502] eta: 0:10:13 lr: 0.000014 loss_cls: 4.1437 (3.9498) grad_norm: 2.3693 (2.3436) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:00 root] (utils.py 283): INFO Epoch: [12] [1710/2502] eta: 0:10:06 lr: 0.000014 loss_cls: 3.9099 (3.9478) grad_norm: 2.3693 (2.3434) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:08 root] (utils.py 283): INFO Epoch: [12] [1720/2502] eta: 0:09:58 lr: 0.000014 loss_cls: 3.7811 (3.9475) grad_norm: 2.3317 (2.3434) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:16 root] (utils.py 283): INFO Epoch: [12] [1730/2502] eta: 0:09:50 lr: 0.000014 loss_cls: 3.9629 (3.9481) grad_norm: 2.3317 (2.3432) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:23 root] (utils.py 283): INFO Epoch: [12] [1740/2502] eta: 0:09:43 lr: 0.000014 loss_cls: 4.1323 (3.9490) grad_norm: 2.3045 (2.3430) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:31 root] (utils.py 283): INFO Epoch: [12] [1750/2502] eta: 0:09:35 lr: 0.000014 loss_cls: 4.3149 (3.9503) grad_norm: 2.3140 (2.3429) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:39 root] (utils.py 283): INFO Epoch: [12] [1760/2502] eta: 0:09:27 lr: 0.000014 loss_cls: 4.3149 (3.9514) grad_norm: 2.3327 (2.3430) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:46 root] (utils.py 283): INFO Epoch: [12] [1770/2502] eta: 0:09:20 lr: 0.000014 loss_cls: 4.2169 (3.9528) grad_norm: 2.3433 (2.3428) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 18:10:54 root] (utils.py 283): INFO Epoch: [12] [1780/2502] eta: 0:09:12 lr: 0.000014 loss_cls: 4.0962 (3.9529) grad_norm: 2.3363 (2.3432) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:02 root] (utils.py 283): INFO Epoch: [12] [1790/2502] eta: 0:09:04 lr: 0.000014 loss_cls: 3.9617 (3.9524) grad_norm: 2.3711 (2.3430) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:09 root] (utils.py 283): INFO Epoch: [12] [1800/2502] eta: 0:08:57 lr: 0.000014 loss_cls: 3.9534 (3.9521) grad_norm: 2.3313 (2.3431) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:17 root] (utils.py 283): INFO Epoch: [12] [1810/2502] eta: 0:08:49 lr: 0.000014 loss_cls: 3.9195 (3.9526) grad_norm: 2.2834 (2.3429) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:24 root] (utils.py 283): INFO Epoch: [12] [1820/2502] eta: 0:08:41 lr: 0.000014 loss_cls: 3.9195 (3.9519) grad_norm: 2.2987 (2.3427) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 18:11:32 root] (utils.py 283): INFO Epoch: [12] [1830/2502] eta: 0:08:34 lr: 0.000014 loss_cls: 3.6669 (3.9500) grad_norm: 2.3692 (2.3430) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:40 root] (utils.py 283): INFO Epoch: [12] [1840/2502] eta: 0:08:26 lr: 0.000014 loss_cls: 3.7500 (3.9501) grad_norm: 2.3860 (2.3432) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:47 root] (utils.py 283): INFO Epoch: [12] [1850/2502] eta: 0:08:18 lr: 0.000014 loss_cls: 4.1490 (3.9499) grad_norm: 2.3732 (2.3433) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:11:55 root] (utils.py 283): INFO Epoch: [12] [1860/2502] eta: 0:08:11 lr: 0.000014 loss_cls: 3.8663 (3.9491) grad_norm: 2.3493 (2.3432) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 18:12:03 root] (utils.py 283): INFO Epoch: [12] [1870/2502] eta: 0:08:03 lr: 0.000014 loss_cls: 3.6519 (3.9480) grad_norm: 2.3682 (2.3437) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 18:12:10 root] (utils.py 283): INFO Epoch: [12] [1880/2502] eta: 0:07:55 lr: 0.000014 loss_cls: 3.5726 (3.9474) grad_norm: 2.4399 (2.3445) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 18:12:18 root] (utils.py 283): INFO Epoch: [12] [1890/2502] eta: 0:07:48 lr: 0.000014 loss_cls: 4.1003 (3.9478) grad_norm: 2.4066 (2.3447) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 18:12:26 root] (utils.py 283): INFO Epoch: [12] [1900/2502] eta: 0:07:40 lr: 0.000014 loss_cls: 4.1332 (3.9487) grad_norm: 2.3492 (2.3445) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 18:12:33 root] (utils.py 283): INFO Epoch: [12] [1910/2502] eta: 0:07:32 lr: 0.000014 loss_cls: 4.1194 (3.9489) grad_norm: 2.3411 (2.3454) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 18:12:41 root] (utils.py 283): INFO Epoch: [12] [1920/2502] eta: 0:07:25 lr: 0.000014 loss_cls: 4.1000 (3.9495) grad_norm: 2.3303 (2.3451) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 18:12:49 root] (utils.py 283): INFO Epoch: [12] [1930/2502] eta: 0:07:17 lr: 0.000014 loss_cls: 4.1260 (3.9504) grad_norm: 2.3143 (2.3452) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 18:12:56 root] (utils.py 283): INFO Epoch: [12] [1940/2502] eta: 0:07:10 lr: 0.000014 loss_cls: 4.2092 (3.9497) grad_norm: 2.3744 (2.3456) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 18:13:04 root] (utils.py 283): INFO Epoch: [12] [1950/2502] eta: 0:07:02 lr: 0.000014 loss_cls: 3.8926 (3.9489) grad_norm: 2.4067 (2.3458) time: 0.7724 data: 0.0003 max mem: 8426 +[2024-12-10 18:13:12 root] (utils.py 283): INFO Epoch: [12] [1960/2502] eta: 0:06:54 lr: 0.000014 loss_cls: 3.6333 (3.9468) grad_norm: 2.3752 (2.3457) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 18:13:19 root] (utils.py 283): INFO Epoch: [12] [1970/2502] eta: 0:06:47 lr: 0.000014 loss_cls: 3.6393 (3.9468) grad_norm: 2.3663 (2.3459) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 18:13:27 root] (utils.py 283): INFO Epoch: [12] [1980/2502] eta: 0:06:39 lr: 0.000014 loss_cls: 4.1200 (3.9471) grad_norm: 2.3824 (2.3458) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 18:13:35 root] (utils.py 283): INFO Epoch: [12] [1990/2502] eta: 0:06:31 lr: 0.000014 loss_cls: 4.0493 (3.9459) grad_norm: 2.3001 (2.3455) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 18:13:42 root] (utils.py 283): INFO Epoch: [12] [2000/2502] eta: 0:06:24 lr: 0.000014 loss_cls: 4.0493 (3.9467) grad_norm: 2.3406 (2.3458) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 18:13:50 root] (utils.py 283): INFO Epoch: [12] [2010/2502] eta: 0:06:16 lr: 0.000014 loss_cls: 4.0156 (3.9463) grad_norm: 2.3783 (2.3459) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 18:13:58 root] (utils.py 283): INFO Epoch: [12] [2020/2502] eta: 0:06:08 lr: 0.000014 loss_cls: 3.6574 (3.9449) grad_norm: 2.2916 (2.3456) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 18:14:05 root] (utils.py 283): INFO Epoch: [12] [2030/2502] eta: 0:06:01 lr: 0.000014 loss_cls: 3.7848 (3.9446) grad_norm: 2.3609 (2.3460) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 18:14:13 root] (utils.py 283): INFO Epoch: [12] [2040/2502] eta: 0:05:53 lr: 0.000014 loss_cls: 3.9274 (3.9451) grad_norm: 2.3844 (2.3458) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 18:14:21 root] (utils.py 283): INFO Epoch: [12] [2050/2502] eta: 0:05:45 lr: 0.000014 loss_cls: 4.0506 (3.9456) grad_norm: 2.3425 (2.3459) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 18:14:28 root] (utils.py 283): INFO Epoch: [12] [2060/2502] eta: 0:05:38 lr: 0.000014 loss_cls: 4.0580 (3.9459) grad_norm: 2.3425 (2.3459) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 18:14:36 root] (utils.py 283): INFO Epoch: [12] [2070/2502] eta: 0:05:30 lr: 0.000014 loss_cls: 4.1047 (3.9457) grad_norm: 2.3258 (2.3458) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 18:14:43 root] (utils.py 283): INFO Epoch: [12] [2080/2502] eta: 0:05:22 lr: 0.000014 loss_cls: 4.1398 (3.9462) grad_norm: 2.2288 (2.3453) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 18:14:51 root] (utils.py 283): INFO Epoch: [12] [2090/2502] eta: 0:05:15 lr: 0.000014 loss_cls: 4.1152 (3.9467) grad_norm: 2.2808 (2.3455) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 18:14:59 root] (utils.py 283): INFO Epoch: [12] [2100/2502] eta: 0:05:07 lr: 0.000014 loss_cls: 4.2460 (3.9484) grad_norm: 2.3179 (2.3453) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:06 root] (utils.py 283): INFO Epoch: [12] [2110/2502] eta: 0:04:59 lr: 0.000014 loss_cls: 4.1956 (3.9478) grad_norm: 2.3394 (2.3456) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:14 root] (utils.py 283): INFO Epoch: [12] [2120/2502] eta: 0:04:52 lr: 0.000014 loss_cls: 4.0295 (3.9482) grad_norm: 2.3566 (2.3455) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:22 root] (utils.py 283): INFO Epoch: [12] [2130/2502] eta: 0:04:44 lr: 0.000014 loss_cls: 4.2338 (3.9494) grad_norm: 2.3135 (2.3451) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:30 root] (utils.py 283): INFO Epoch: [12] [2140/2502] eta: 0:04:37 lr: 0.000014 loss_cls: 4.1652 (3.9498) grad_norm: 2.3135 (2.3451) time: 0.7710 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:37 root] (utils.py 283): INFO Epoch: [12] [2150/2502] eta: 0:04:29 lr: 0.000014 loss_cls: 4.1647 (3.9503) grad_norm: 2.3705 (2.3455) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:45 root] (utils.py 283): INFO Epoch: [12] [2160/2502] eta: 0:04:21 lr: 0.000014 loss_cls: 4.1774 (3.9507) grad_norm: 2.3816 (2.3457) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 18:15:52 root] (utils.py 283): INFO Epoch: [12] [2170/2502] eta: 0:04:14 lr: 0.000014 loss_cls: 4.1079 (3.9511) grad_norm: 2.3360 (2.3455) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:16:00 root] (utils.py 283): INFO Epoch: [12] [2180/2502] eta: 0:04:06 lr: 0.000014 loss_cls: 4.1079 (3.9510) grad_norm: 2.3674 (2.3458) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 18:16:08 root] (utils.py 283): INFO Epoch: [12] [2190/2502] eta: 0:03:58 lr: 0.000014 loss_cls: 4.0241 (3.9513) grad_norm: 2.3860 (2.3458) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 18:16:15 root] (utils.py 283): INFO Epoch: [12] [2200/2502] eta: 0:03:51 lr: 0.000014 loss_cls: 4.0241 (3.9513) grad_norm: 2.2884 (2.3455) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 18:16:23 root] (utils.py 283): INFO Epoch: [12] [2210/2502] eta: 0:03:43 lr: 0.000014 loss_cls: 3.9743 (3.9503) grad_norm: 2.3215 (2.3459) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 18:16:31 root] (utils.py 283): INFO Epoch: [12] [2220/2502] eta: 0:03:35 lr: 0.000014 loss_cls: 3.9580 (3.9513) grad_norm: 2.3388 (2.3457) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 18:16:38 root] (utils.py 283): INFO Epoch: [12] [2230/2502] eta: 0:03:28 lr: 0.000014 loss_cls: 4.0120 (3.9508) grad_norm: 2.2892 (2.3455) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 18:16:46 root] (utils.py 283): INFO Epoch: [12] [2240/2502] eta: 0:03:20 lr: 0.000014 loss_cls: 4.0503 (3.9505) grad_norm: 2.2892 (2.3456) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 18:16:54 root] (utils.py 283): INFO Epoch: [12] [2250/2502] eta: 0:03:12 lr: 0.000014 loss_cls: 4.0368 (3.9506) grad_norm: 2.2927 (2.3456) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 18:17:01 root] (utils.py 283): INFO Epoch: [12] [2260/2502] eta: 0:03:05 lr: 0.000014 loss_cls: 4.1763 (3.9510) grad_norm: 2.3256 (2.3457) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 18:17:09 root] (utils.py 283): INFO Epoch: [12] [2270/2502] eta: 0:02:57 lr: 0.000014 loss_cls: 3.9457 (3.9500) grad_norm: 2.3035 (2.3455) time: 0.7710 data: 0.0002 max mem: 8426 +[2024-12-10 18:17:17 root] (utils.py 283): INFO Epoch: [12] [2280/2502] eta: 0:02:49 lr: 0.000014 loss_cls: 4.0051 (3.9510) grad_norm: 2.3026 (2.3455) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 18:17:24 root] (utils.py 283): INFO Epoch: [12] [2290/2502] eta: 0:02:42 lr: 0.000014 loss_cls: 4.3335 (3.9517) grad_norm: 2.3175 (2.3453) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 18:17:32 root] (utils.py 283): INFO Epoch: [12] [2300/2502] eta: 0:02:34 lr: 0.000014 loss_cls: 4.2359 (3.9524) grad_norm: 2.3335 (2.3453) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 18:17:40 root] (utils.py 283): INFO Epoch: [12] [2310/2502] eta: 0:02:26 lr: 0.000014 loss_cls: 4.2222 (3.9531) grad_norm: 2.3670 (2.3455) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 18:17:47 root] (utils.py 283): INFO Epoch: [12] [2320/2502] eta: 0:02:19 lr: 0.000014 loss_cls: 4.2222 (3.9546) grad_norm: 2.2900 (2.3452) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 18:17:55 root] (utils.py 283): INFO Epoch: [12] [2330/2502] eta: 0:02:11 lr: 0.000014 loss_cls: 4.1755 (3.9539) grad_norm: 2.2985 (2.3454) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 18:18:03 root] (utils.py 283): INFO Epoch: [12] [2340/2502] eta: 0:02:03 lr: 0.000014 loss_cls: 3.8510 (3.9531) grad_norm: 2.3080 (2.3452) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 18:18:10 root] (utils.py 283): INFO Epoch: [12] [2350/2502] eta: 0:01:56 lr: 0.000014 loss_cls: 3.7191 (3.9523) grad_norm: 2.2954 (2.3453) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 18:18:18 root] (utils.py 283): INFO Epoch: [12] [2360/2502] eta: 0:01:48 lr: 0.000014 loss_cls: 3.9549 (3.9524) grad_norm: 2.4134 (2.3456) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 18:18:26 root] (utils.py 283): INFO Epoch: [12] [2370/2502] eta: 0:01:41 lr: 0.000014 loss_cls: 4.0154 (3.9523) grad_norm: 2.3516 (2.3454) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 18:18:33 root] (utils.py 283): INFO Epoch: [12] [2380/2502] eta: 0:01:33 lr: 0.000014 loss_cls: 4.0376 (3.9523) grad_norm: 2.2646 (2.3453) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 18:18:41 root] (utils.py 283): INFO Epoch: [12] [2390/2502] eta: 0:01:25 lr: 0.000014 loss_cls: 4.0376 (3.9519) grad_norm: 2.3335 (2.3454) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 18:18:49 root] (utils.py 283): INFO Epoch: [12] [2400/2502] eta: 0:01:18 lr: 0.000014 loss_cls: 4.0826 (3.9523) grad_norm: 2.3335 (2.3453) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:18:56 root] (utils.py 283): INFO Epoch: [12] [2410/2502] eta: 0:01:10 lr: 0.000014 loss_cls: 4.0760 (3.9520) grad_norm: 2.2865 (2.3452) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:04 root] (utils.py 283): INFO Epoch: [12] [2420/2502] eta: 0:01:02 lr: 0.000014 loss_cls: 4.0760 (3.9529) grad_norm: 2.3276 (2.3454) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:11 root] (utils.py 283): INFO Epoch: [12] [2430/2502] eta: 0:00:55 lr: 0.000014 loss_cls: 3.9457 (3.9517) grad_norm: 2.3276 (2.3454) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:19 root] (utils.py 283): INFO Epoch: [12] [2440/2502] eta: 0:00:47 lr: 0.000014 loss_cls: 3.7196 (3.9518) grad_norm: 2.2940 (2.3451) time: 0.7732 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:27 root] (utils.py 283): INFO Epoch: [12] [2450/2502] eta: 0:00:39 lr: 0.000014 loss_cls: 4.2057 (3.9519) grad_norm: 2.2918 (2.3450) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:35 root] (utils.py 283): INFO Epoch: [12] [2460/2502] eta: 0:00:32 lr: 0.000014 loss_cls: 4.1430 (3.9512) grad_norm: 2.3477 (2.3454) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:42 root] (utils.py 283): INFO Epoch: [12] [2470/2502] eta: 0:00:24 lr: 0.000014 loss_cls: 3.6893 (3.9509) grad_norm: 2.3569 (2.3456) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 18:19:50 root] (utils.py 283): INFO Epoch: [12] [2480/2502] eta: 0:00:16 lr: 0.000014 loss_cls: 3.6893 (3.9503) grad_norm: 2.3300 (2.3455) time: 0.7727 data: 0.0003 max mem: 8426 +[2024-12-10 18:19:58 root] (utils.py 283): INFO Epoch: [12] [2490/2502] eta: 0:00:09 lr: 0.000014 loss_cls: 3.6368 (3.9492) grad_norm: 2.3309 (2.3456) time: 0.7975 data: 0.0241 max mem: 8426 +[2024-12-10 18:20:06 root] (utils.py 283): INFO Epoch: [12] [2500/2502] eta: 0:00:01 lr: 0.000014 loss_cls: 3.8229 (3.9492) grad_norm: 2.3309 (2.3453) time: 0.7897 data: 0.0241 max mem: 8426 +[2024-12-10 18:20:07 root] (utils.py 283): INFO Epoch: [12] [2501/2502] eta: 0:00:00 lr: 0.000014 loss_cls: 3.8229 (3.9494) grad_norm: 2.3344 (2.3453) time: 0.7892 data: 0.0241 max mem: 8426 +[2024-12-10 18:20:07 root] (utils.py 297): INFO Epoch: [12] Total time: 0:31:55 (0.7657 s / it) +[2024-12-10 18:20:07 root] (engine.py 179): INFO Averaged stats:lr: 0.000014 loss_cls: 3.8229 (3.9389) grad_norm: 2.3344 (2.3453) +[2024-12-10 18:20:07 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.5927 (0.5927) acc1: 85.9375 (85.9375) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.1273 data: 0.0004 max mem: 8426 +[2024-12-10 18:20:08 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7306 (0.8142) acc1: 85.9375 (81.6761) acc3: 95.3125 (93.6080) acc5: 96.8750 (96.3068) time: 0.1278 data: 0.0004 max mem: 8426 +[2024-12-10 18:20:10 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8659 (0.8662) acc1: 79.6875 (80.3571) acc3: 92.9688 (92.9688) acc5: 95.3125 (95.6101) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 18:20:11 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9297 (0.8823) acc1: 78.9062 (79.6371) acc3: 92.9688 (93.1956) acc5: 95.3125 (95.7409) time: 0.1282 data: 0.0004 max mem: 8426 +[2024-12-10 18:20:12 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8267 (0.8723) acc1: 79.6875 (80.1829) acc3: 93.7500 (93.2546) acc5: 96.8750 (95.8841) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 18:20:14 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0875 (0.9587) acc1: 75.0000 (78.2322) acc3: 88.2812 (91.8199) acc5: 93.7500 (94.7610) time: 0.1284 data: 0.0004 max mem: 8426 +[2024-12-10 18:20:15 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2725 (1.0044) acc1: 71.0938 (77.5102) acc3: 85.1562 (90.8555) acc5: 89.8438 (93.9421) time: 0.1409 data: 0.0127 max mem: 8426 +[2024-12-10 18:20:17 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2037 (1.0444) acc1: 73.4375 (76.6285) acc3: 86.7188 (90.2729) acc5: 90.6250 (93.4639) time: 0.1794 data: 0.0429 max mem: 8426 +[2024-12-10 18:20:19 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2061 (1.0774) acc1: 73.4375 (75.8873) acc3: 85.9375 (89.6798) acc5: 90.6250 (93.0170) time: 0.1794 data: 0.0309 max mem: 8426 +[2024-12-10 18:20:20 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2084 (1.1035) acc1: 71.8750 (75.2146) acc3: 85.1562 (89.3115) acc5: 90.6250 (92.7713) time: 0.1406 data: 0.0007 max mem: 8426 +[2024-12-10 18:20:21 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1917 (1.0916) acc1: 73.4375 (75.4880) acc3: 88.2812 (89.5120) acc5: 91.4062 (92.9520) time: 0.1314 data: 0.0007 max mem: 8426 +[2024-12-10 18:20:21 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1413 s / it) +[2024-12-10 18:20:21 root] (engine.py 264): INFO * Acc@1 75.482 Acc@3 89.562 Acc@5 92.854 loss 1.094 flops 1.285 layer_flops 1.251 +[2024-12-10 18:20:21 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.5% +[2024-12-10 18:20:21 root] (main.py 576): INFO Max accuracy: 75.63% +[2024-12-10 18:20:22 root] (utils.py 283): INFO Epoch: [13] [ 0/2502] eta: 0:34:50 lr: 0.000013 loss_cls: 3.8983 (3.8983) grad_norm: 2.3898 (2.3898) time: 0.8354 data: 0.0003 max mem: 8426 +[2024-12-10 18:20:29 root] (utils.py 283): INFO Epoch: [13] [ 10/2502] eta: 0:31:54 lr: 0.000013 loss_cls: 3.8983 (3.8408) grad_norm: 2.3601 (2.3530) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 18:20:37 root] (utils.py 283): INFO Epoch: [13] [ 20/2502] eta: 0:31:45 lr: 0.000013 loss_cls: 4.0077 (3.8259) grad_norm: 2.3104 (2.3292) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 18:20:45 root] (utils.py 283): INFO Epoch: [13] [ 30/2502] eta: 0:31:39 lr: 0.000013 loss_cls: 3.8879 (3.7940) grad_norm: 2.2871 (2.3383) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 18:20:52 root] (utils.py 283): INFO Epoch: [13] [ 40/2502] eta: 0:31:34 lr: 0.000013 loss_cls: 4.1478 (3.9119) grad_norm: 2.3687 (2.3531) time: 0.7709 data: 0.0003 max mem: 8426 +[2024-12-10 18:21:00 root] (utils.py 283): INFO Epoch: [13] [ 50/2502] eta: 0:31:31 lr: 0.000013 loss_cls: 4.2361 (3.9209) grad_norm: 2.3723 (2.3552) time: 0.7761 data: 0.0002 max mem: 8426 +[2024-12-10 18:21:08 root] (utils.py 283): INFO Epoch: [13] [ 60/2502] eta: 0:31:26 lr: 0.000013 loss_cls: 3.9804 (3.9283) grad_norm: 2.3824 (2.3615) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 18:21:16 root] (utils.py 283): INFO Epoch: [13] [ 70/2502] eta: 0:31:16 lr: 0.000013 loss_cls: 3.9398 (3.9159) grad_norm: 2.3824 (2.3537) time: 0.7724 data: 0.0002 max mem: 8426 +[2024-12-10 18:21:23 root] (utils.py 283): INFO Epoch: [13] [ 80/2502] eta: 0:31:06 lr: 0.000013 loss_cls: 3.8577 (3.9071) grad_norm: 2.3351 (2.3525) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 18:21:31 root] (utils.py 283): INFO Epoch: [13] [ 90/2502] eta: 0:30:59 lr: 0.000013 loss_cls: 3.6892 (3.9011) grad_norm: 2.3606 (2.3532) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 18:21:39 root] (utils.py 283): INFO Epoch: [13] [ 100/2502] eta: 0:30:53 lr: 0.000013 loss_cls: 3.6516 (3.8851) grad_norm: 2.3701 (2.3562) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-10 18:21:47 root] (utils.py 283): INFO Epoch: [13] [ 110/2502] eta: 0:30:47 lr: 0.000013 loss_cls: 4.0513 (3.9004) grad_norm: 2.2841 (2.3442) time: 0.7792 data: 0.0002 max mem: 8426 +[2024-12-10 18:21:54 root] (utils.py 283): INFO Epoch: [13] [ 120/2502] eta: 0:30:39 lr: 0.000013 loss_cls: 4.2889 (3.9134) grad_norm: 2.3021 (2.3473) time: 0.7754 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:02 root] (utils.py 283): INFO Epoch: [13] [ 130/2502] eta: 0:30:31 lr: 0.000013 loss_cls: 4.0943 (3.9097) grad_norm: 2.3534 (2.3510) time: 0.7710 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:10 root] (utils.py 283): INFO Epoch: [13] [ 140/2502] eta: 0:30:25 lr: 0.000013 loss_cls: 4.0358 (3.9117) grad_norm: 2.3347 (2.3449) time: 0.7749 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:18 root] (utils.py 283): INFO Epoch: [13] [ 150/2502] eta: 0:30:18 lr: 0.000013 loss_cls: 4.2849 (3.9225) grad_norm: 2.2833 (2.3463) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:25 root] (utils.py 283): INFO Epoch: [13] [ 160/2502] eta: 0:30:11 lr: 0.000013 loss_cls: 4.2849 (3.9307) grad_norm: 2.2935 (2.3448) time: 0.7782 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:33 root] (utils.py 283): INFO Epoch: [13] [ 170/2502] eta: 0:30:05 lr: 0.000013 loss_cls: 3.9228 (3.9230) grad_norm: 2.2762 (2.3428) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:41 root] (utils.py 283): INFO Epoch: [13] [ 180/2502] eta: 0:29:56 lr: 0.000013 loss_cls: 3.7020 (3.9079) grad_norm: 2.2803 (2.3408) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 18:22:49 root] (utils.py 283): INFO Epoch: [13] [ 190/2502] eta: 0:29:47 lr: 0.000013 loss_cls: 3.9840 (3.9204) grad_norm: 2.3493 (2.3457) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 18:22:56 root] (utils.py 283): INFO Epoch: [13] [ 200/2502] eta: 0:29:39 lr: 0.000013 loss_cls: 4.0685 (3.9256) grad_norm: 2.3091 (2.3433) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:04 root] (utils.py 283): INFO Epoch: [13] [ 210/2502] eta: 0:29:30 lr: 0.000013 loss_cls: 4.1301 (3.9379) grad_norm: 2.3174 (2.3456) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:12 root] (utils.py 283): INFO Epoch: [13] [ 220/2502] eta: 0:29:23 lr: 0.000013 loss_cls: 4.0417 (3.9335) grad_norm: 2.3864 (2.3460) time: 0.7726 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:19 root] (utils.py 283): INFO Epoch: [13] [ 230/2502] eta: 0:29:15 lr: 0.000013 loss_cls: 3.7601 (3.9272) grad_norm: 2.3476 (2.3459) time: 0.7728 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:27 root] (utils.py 283): INFO Epoch: [13] [ 240/2502] eta: 0:29:06 lr: 0.000013 loss_cls: 3.8120 (3.9292) grad_norm: 2.2957 (2.3455) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:35 root] (utils.py 283): INFO Epoch: [13] [ 250/2502] eta: 0:28:57 lr: 0.000013 loss_cls: 4.2114 (3.9351) grad_norm: 2.3491 (2.3456) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 18:23:42 root] (utils.py 283): INFO Epoch: [13] [ 260/2502] eta: 0:28:50 lr: 0.000013 loss_cls: 4.0274 (3.9351) grad_norm: 2.3580 (2.3489) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:50 root] (utils.py 283): INFO Epoch: [13] [ 270/2502] eta: 0:28:42 lr: 0.000013 loss_cls: 4.0090 (3.9385) grad_norm: 2.3640 (2.3519) time: 0.7715 data: 0.0003 max mem: 8426 +[2024-12-10 18:23:58 root] (utils.py 283): INFO Epoch: [13] [ 280/2502] eta: 0:28:34 lr: 0.000013 loss_cls: 3.7813 (3.9271) grad_norm: 2.3640 (2.3516) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 18:24:05 root] (utils.py 283): INFO Epoch: [13] [ 290/2502] eta: 0:28:26 lr: 0.000013 loss_cls: 3.9897 (3.9335) grad_norm: 2.3113 (2.3499) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 18:24:13 root] (utils.py 283): INFO Epoch: [13] [ 300/2502] eta: 0:28:18 lr: 0.000013 loss_cls: 4.2733 (3.9432) grad_norm: 2.2676 (2.3481) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 18:24:21 root] (utils.py 283): INFO Epoch: [13] [ 310/2502] eta: 0:28:10 lr: 0.000013 loss_cls: 4.1259 (3.9479) grad_norm: 2.2674 (2.3491) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 18:24:29 root] (utils.py 283): INFO Epoch: [13] [ 320/2502] eta: 0:28:02 lr: 0.000013 loss_cls: 4.1088 (3.9478) grad_norm: 2.3092 (2.3473) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 18:24:36 root] (utils.py 283): INFO Epoch: [13] [ 330/2502] eta: 0:27:54 lr: 0.000013 loss_cls: 4.2620 (3.9609) grad_norm: 2.3139 (2.3478) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 18:24:44 root] (utils.py 283): INFO Epoch: [13] [ 340/2502] eta: 0:27:46 lr: 0.000013 loss_cls: 4.3295 (3.9637) grad_norm: 2.3206 (2.3486) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 18:24:51 root] (utils.py 283): INFO Epoch: [13] [ 350/2502] eta: 0:27:38 lr: 0.000013 loss_cls: 4.0656 (3.9537) grad_norm: 2.3184 (2.3481) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 18:24:59 root] (utils.py 283): INFO Epoch: [13] [ 360/2502] eta: 0:27:30 lr: 0.000013 loss_cls: 3.8968 (3.9502) grad_norm: 2.3449 (2.3498) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 18:25:07 root] (utils.py 283): INFO Epoch: [13] [ 370/2502] eta: 0:27:21 lr: 0.000013 loss_cls: 3.8968 (3.9439) grad_norm: 2.3700 (2.3508) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 18:25:14 root] (utils.py 283): INFO Epoch: [13] [ 380/2502] eta: 0:27:13 lr: 0.000013 loss_cls: 3.8288 (3.9423) grad_norm: 2.3024 (2.3506) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 18:25:22 root] (utils.py 283): INFO Epoch: [13] [ 390/2502] eta: 0:27:05 lr: 0.000013 loss_cls: 4.0320 (3.9458) grad_norm: 2.3204 (2.3516) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 18:25:30 root] (utils.py 283): INFO Epoch: [13] [ 400/2502] eta: 0:26:58 lr: 0.000013 loss_cls: 3.9865 (3.9415) grad_norm: 2.3204 (2.3494) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 18:25:37 root] (utils.py 283): INFO Epoch: [13] [ 410/2502] eta: 0:26:49 lr: 0.000013 loss_cls: 4.0594 (3.9447) grad_norm: 2.3484 (2.3522) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 18:25:45 root] (utils.py 283): INFO Epoch: [13] [ 420/2502] eta: 0:26:41 lr: 0.000013 loss_cls: 4.1920 (3.9460) grad_norm: 2.4632 (2.3531) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 18:25:52 root] (utils.py 283): INFO Epoch: [13] [ 430/2502] eta: 0:26:33 lr: 0.000013 loss_cls: 3.9536 (3.9411) grad_norm: 2.3139 (2.3535) time: 0.7603 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:00 root] (utils.py 283): INFO Epoch: [13] [ 440/2502] eta: 0:26:25 lr: 0.000013 loss_cls: 3.7099 (3.9394) grad_norm: 2.3072 (2.3531) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:08 root] (utils.py 283): INFO Epoch: [13] [ 450/2502] eta: 0:26:17 lr: 0.000013 loss_cls: 3.8337 (3.9383) grad_norm: 2.2995 (2.3524) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:15 root] (utils.py 283): INFO Epoch: [13] [ 460/2502] eta: 0:26:09 lr: 0.000013 loss_cls: 3.7064 (3.9287) grad_norm: 2.2995 (2.3519) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 18:26:23 root] (utils.py 283): INFO Epoch: [13] [ 470/2502] eta: 0:26:02 lr: 0.000013 loss_cls: 3.7064 (3.9276) grad_norm: 2.3116 (2.3515) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:31 root] (utils.py 283): INFO Epoch: [13] [ 480/2502] eta: 0:25:54 lr: 0.000013 loss_cls: 3.9854 (3.9287) grad_norm: 2.3049 (2.3502) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:38 root] (utils.py 283): INFO Epoch: [13] [ 490/2502] eta: 0:25:46 lr: 0.000013 loss_cls: 3.8079 (3.9294) grad_norm: 2.3151 (2.3498) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:46 root] (utils.py 283): INFO Epoch: [13] [ 500/2502] eta: 0:25:38 lr: 0.000013 loss_cls: 4.3018 (3.9359) grad_norm: 2.3145 (2.3499) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 18:26:54 root] (utils.py 283): INFO Epoch: [13] [ 510/2502] eta: 0:25:30 lr: 0.000013 loss_cls: 4.3217 (3.9373) grad_norm: 2.3568 (2.3507) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 18:27:01 root] (utils.py 283): INFO Epoch: [13] [ 520/2502] eta: 0:25:23 lr: 0.000013 loss_cls: 4.1842 (3.9412) grad_norm: 2.3482 (2.3500) time: 0.7733 data: 0.0003 max mem: 8426 +[2024-12-10 18:27:09 root] (utils.py 283): INFO Epoch: [13] [ 530/2502] eta: 0:25:15 lr: 0.000013 loss_cls: 4.0837 (3.9437) grad_norm: 2.2910 (2.3489) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 18:27:17 root] (utils.py 283): INFO Epoch: [13] [ 540/2502] eta: 0:25:07 lr: 0.000013 loss_cls: 4.0837 (3.9465) grad_norm: 2.3007 (2.3479) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 18:27:24 root] (utils.py 283): INFO Epoch: [13] [ 550/2502] eta: 0:24:59 lr: 0.000013 loss_cls: 4.0489 (3.9442) grad_norm: 2.3061 (2.3482) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 18:27:32 root] (utils.py 283): INFO Epoch: [13] [ 560/2502] eta: 0:24:51 lr: 0.000013 loss_cls: 4.0489 (3.9469) grad_norm: 2.3061 (2.3472) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 18:27:40 root] (utils.py 283): INFO Epoch: [13] [ 570/2502] eta: 0:24:43 lr: 0.000013 loss_cls: 4.0539 (3.9470) grad_norm: 2.3405 (2.3482) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 18:27:47 root] (utils.py 283): INFO Epoch: [13] [ 580/2502] eta: 0:24:35 lr: 0.000013 loss_cls: 4.1440 (3.9461) grad_norm: 2.3405 (2.3469) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 18:27:55 root] (utils.py 283): INFO Epoch: [13] [ 590/2502] eta: 0:24:28 lr: 0.000013 loss_cls: 4.1768 (3.9498) grad_norm: 2.3214 (2.3466) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 18:28:02 root] (utils.py 283): INFO Epoch: [13] [ 600/2502] eta: 0:24:20 lr: 0.000013 loss_cls: 4.2265 (3.9509) grad_norm: 2.3202 (2.3464) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 18:28:10 root] (utils.py 283): INFO Epoch: [13] [ 610/2502] eta: 0:24:12 lr: 0.000013 loss_cls: 4.0287 (3.9497) grad_norm: 2.3081 (2.3464) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 18:28:18 root] (utils.py 283): INFO Epoch: [13] [ 620/2502] eta: 0:24:04 lr: 0.000013 loss_cls: 3.7378 (3.9413) grad_norm: 2.3182 (2.3467) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 18:28:25 root] (utils.py 283): INFO Epoch: [13] [ 630/2502] eta: 0:23:57 lr: 0.000013 loss_cls: 3.7380 (3.9405) grad_norm: 2.3263 (2.3472) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 18:28:33 root] (utils.py 283): INFO Epoch: [13] [ 640/2502] eta: 0:23:49 lr: 0.000013 loss_cls: 3.9224 (3.9410) grad_norm: 2.3517 (2.3472) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 18:28:41 root] (utils.py 283): INFO Epoch: [13] [ 650/2502] eta: 0:23:41 lr: 0.000013 loss_cls: 3.9750 (3.9398) grad_norm: 2.3470 (2.3476) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 18:28:48 root] (utils.py 283): INFO Epoch: [13] [ 660/2502] eta: 0:23:33 lr: 0.000013 loss_cls: 4.0489 (3.9393) grad_norm: 2.3612 (2.3480) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 18:28:56 root] (utils.py 283): INFO Epoch: [13] [ 670/2502] eta: 0:23:26 lr: 0.000013 loss_cls: 4.0937 (3.9410) grad_norm: 2.3582 (2.3480) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 18:29:04 root] (utils.py 283): INFO Epoch: [13] [ 680/2502] eta: 0:23:18 lr: 0.000013 loss_cls: 3.9619 (3.9417) grad_norm: 2.3154 (2.3481) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 18:29:11 root] (utils.py 283): INFO Epoch: [13] [ 690/2502] eta: 0:23:10 lr: 0.000013 loss_cls: 3.9573 (3.9376) grad_norm: 2.3547 (2.3479) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 18:29:19 root] (utils.py 283): INFO Epoch: [13] [ 700/2502] eta: 0:23:02 lr: 0.000013 loss_cls: 3.8975 (3.9361) grad_norm: 2.3479 (2.3484) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 18:29:27 root] (utils.py 283): INFO Epoch: [13] [ 710/2502] eta: 0:22:55 lr: 0.000013 loss_cls: 3.9849 (3.9364) grad_norm: 2.3391 (2.3484) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 18:29:34 root] (utils.py 283): INFO Epoch: [13] [ 720/2502] eta: 0:22:47 lr: 0.000013 loss_cls: 4.1887 (3.9398) grad_norm: 2.3391 (2.3480) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 18:29:42 root] (utils.py 283): INFO Epoch: [13] [ 730/2502] eta: 0:22:39 lr: 0.000013 loss_cls: 4.0354 (3.9356) grad_norm: 2.3657 (2.3488) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 18:29:50 root] (utils.py 283): INFO Epoch: [13] [ 740/2502] eta: 0:22:32 lr: 0.000013 loss_cls: 3.7313 (3.9383) grad_norm: 2.3934 (2.3492) time: 0.7766 data: 0.0002 max mem: 8426 +[2024-12-10 18:29:58 root] (utils.py 283): INFO Epoch: [13] [ 750/2502] eta: 0:22:25 lr: 0.000013 loss_cls: 4.1026 (3.9361) grad_norm: 2.3146 (2.3492) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 18:30:05 root] (utils.py 283): INFO Epoch: [13] [ 760/2502] eta: 0:22:17 lr: 0.000013 loss_cls: 3.9572 (3.9385) grad_norm: 2.2979 (2.3478) time: 0.7758 data: 0.0003 max mem: 8426 +[2024-12-10 18:30:13 root] (utils.py 283): INFO Epoch: [13] [ 770/2502] eta: 0:22:10 lr: 0.000013 loss_cls: 3.9572 (3.9383) grad_norm: 2.2878 (2.3473) time: 0.7760 data: 0.0002 max mem: 8426 +[2024-12-10 18:30:21 root] (utils.py 283): INFO Epoch: [13] [ 780/2502] eta: 0:22:02 lr: 0.000013 loss_cls: 4.0691 (3.9402) grad_norm: 2.2934 (2.3467) time: 0.7749 data: 0.0003 max mem: 8426 +[2024-12-10 18:30:29 root] (utils.py 283): INFO Epoch: [13] [ 790/2502] eta: 0:21:55 lr: 0.000013 loss_cls: 4.0928 (3.9421) grad_norm: 2.3429 (2.3472) time: 0.7798 data: 0.0003 max mem: 8426 +[2024-12-10 18:30:37 root] (utils.py 283): INFO Epoch: [13] [ 800/2502] eta: 0:21:48 lr: 0.000013 loss_cls: 4.1486 (3.9434) grad_norm: 2.3429 (2.3470) time: 0.7885 data: 0.0002 max mem: 8426 +[2024-12-10 18:30:44 root] (utils.py 283): INFO Epoch: [13] [ 810/2502] eta: 0:21:40 lr: 0.000013 loss_cls: 4.0205 (3.9445) grad_norm: 2.3010 (2.3467) time: 0.7832 data: 0.0002 max mem: 8426 +[2024-12-10 18:30:52 root] (utils.py 283): INFO Epoch: [13] [ 820/2502] eta: 0:21:32 lr: 0.000013 loss_cls: 3.9036 (3.9447) grad_norm: 2.3428 (2.3465) time: 0.7720 data: 0.0002 max mem: 8426 +[2024-12-10 18:31:00 root] (utils.py 283): INFO Epoch: [13] [ 830/2502] eta: 0:21:25 lr: 0.000013 loss_cls: 3.8491 (3.9457) grad_norm: 2.3804 (2.3474) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 18:31:07 root] (utils.py 283): INFO Epoch: [13] [ 840/2502] eta: 0:21:17 lr: 0.000013 loss_cls: 3.7971 (3.9417) grad_norm: 2.4218 (2.3482) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 18:31:15 root] (utils.py 283): INFO Epoch: [13] [ 850/2502] eta: 0:21:09 lr: 0.000013 loss_cls: 3.8035 (3.9431) grad_norm: 2.3575 (2.3484) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 18:31:23 root] (utils.py 283): INFO Epoch: [13] [ 860/2502] eta: 0:21:01 lr: 0.000013 loss_cls: 4.0886 (3.9449) grad_norm: 2.3228 (2.3481) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 18:31:30 root] (utils.py 283): INFO Epoch: [13] [ 870/2502] eta: 0:20:54 lr: 0.000013 loss_cls: 3.9510 (3.9427) grad_norm: 2.2736 (2.3474) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 18:31:38 root] (utils.py 283): INFO Epoch: [13] [ 880/2502] eta: 0:20:46 lr: 0.000013 loss_cls: 3.7096 (3.9406) grad_norm: 2.2890 (2.3477) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 18:31:46 root] (utils.py 283): INFO Epoch: [13] [ 890/2502] eta: 0:20:38 lr: 0.000013 loss_cls: 3.7060 (3.9367) grad_norm: 2.3009 (2.3473) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 18:31:53 root] (utils.py 283): INFO Epoch: [13] [ 900/2502] eta: 0:20:30 lr: 0.000013 loss_cls: 3.9723 (3.9364) grad_norm: 2.3465 (2.3477) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 18:32:01 root] (utils.py 283): INFO Epoch: [13] [ 910/2502] eta: 0:20:23 lr: 0.000013 loss_cls: 3.7832 (3.9328) grad_norm: 2.4165 (2.3487) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 18:32:09 root] (utils.py 283): INFO Epoch: [13] [ 920/2502] eta: 0:20:15 lr: 0.000013 loss_cls: 3.7476 (3.9330) grad_norm: 2.3563 (2.3489) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 18:32:16 root] (utils.py 283): INFO Epoch: [13] [ 930/2502] eta: 0:20:07 lr: 0.000013 loss_cls: 3.7787 (3.9322) grad_norm: 2.3461 (2.3488) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 18:32:24 root] (utils.py 283): INFO Epoch: [13] [ 940/2502] eta: 0:20:00 lr: 0.000013 loss_cls: 3.8942 (3.9338) grad_norm: 2.3052 (2.3488) time: 0.7723 data: 0.0002 max mem: 8426 +[2024-12-10 18:32:32 root] (utils.py 283): INFO Epoch: [13] [ 950/2502] eta: 0:19:52 lr: 0.000013 loss_cls: 4.1947 (3.9385) grad_norm: 2.3584 (2.3491) time: 0.7808 data: 0.0002 max mem: 8426 +[2024-12-10 18:32:39 root] (utils.py 283): INFO Epoch: [13] [ 960/2502] eta: 0:19:44 lr: 0.000013 loss_cls: 4.3418 (3.9399) grad_norm: 2.3535 (2.3485) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 18:32:47 root] (utils.py 283): INFO Epoch: [13] [ 970/2502] eta: 0:19:37 lr: 0.000013 loss_cls: 4.1544 (3.9395) grad_norm: 2.2911 (2.3480) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 18:32:55 root] (utils.py 283): INFO Epoch: [13] [ 980/2502] eta: 0:19:29 lr: 0.000013 loss_cls: 4.1544 (3.9407) grad_norm: 2.2389 (2.3475) time: 0.7759 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:03 root] (utils.py 283): INFO Epoch: [13] [ 990/2502] eta: 0:19:21 lr: 0.000013 loss_cls: 4.1796 (3.9437) grad_norm: 2.2551 (2.3468) time: 0.7687 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:10 root] (utils.py 283): INFO Epoch: [13] [1000/2502] eta: 0:19:14 lr: 0.000013 loss_cls: 4.1796 (3.9429) grad_norm: 2.3108 (2.3469) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:18 root] (utils.py 283): INFO Epoch: [13] [1010/2502] eta: 0:19:06 lr: 0.000013 loss_cls: 4.0777 (3.9442) grad_norm: 2.3108 (2.3465) time: 0.7598 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:26 root] (utils.py 283): INFO Epoch: [13] [1020/2502] eta: 0:18:58 lr: 0.000013 loss_cls: 4.1112 (3.9460) grad_norm: 2.3265 (2.3469) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 18:33:33 root] (utils.py 283): INFO Epoch: [13] [1030/2502] eta: 0:18:50 lr: 0.000013 loss_cls: 4.1093 (3.9455) grad_norm: 2.3265 (2.3468) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:41 root] (utils.py 283): INFO Epoch: [13] [1040/2502] eta: 0:18:43 lr: 0.000013 loss_cls: 4.0881 (3.9458) grad_norm: 2.3029 (2.3465) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:49 root] (utils.py 283): INFO Epoch: [13] [1050/2502] eta: 0:18:35 lr: 0.000013 loss_cls: 4.1312 (3.9466) grad_norm: 2.3654 (2.3470) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 18:33:56 root] (utils.py 283): INFO Epoch: [13] [1060/2502] eta: 0:18:27 lr: 0.000013 loss_cls: 4.0280 (3.9465) grad_norm: 2.3556 (2.3472) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 18:34:04 root] (utils.py 283): INFO Epoch: [13] [1070/2502] eta: 0:18:20 lr: 0.000013 loss_cls: 4.2120 (3.9486) grad_norm: 2.3378 (2.3472) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 18:34:11 root] (utils.py 283): INFO Epoch: [13] [1080/2502] eta: 0:18:12 lr: 0.000013 loss_cls: 4.2492 (3.9488) grad_norm: 2.3407 (2.3475) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 18:34:19 root] (utils.py 283): INFO Epoch: [13] [1090/2502] eta: 0:18:04 lr: 0.000013 loss_cls: 4.2157 (3.9516) grad_norm: 2.3614 (2.3477) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 18:34:27 root] (utils.py 283): INFO Epoch: [13] [1100/2502] eta: 0:17:57 lr: 0.000013 loss_cls: 4.2361 (3.9517) grad_norm: 2.3516 (2.3472) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 18:34:35 root] (utils.py 283): INFO Epoch: [13] [1110/2502] eta: 0:17:49 lr: 0.000013 loss_cls: 4.0813 (3.9507) grad_norm: 2.3528 (2.3476) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 18:34:42 root] (utils.py 283): INFO Epoch: [13] [1120/2502] eta: 0:17:41 lr: 0.000013 loss_cls: 4.0077 (3.9476) grad_norm: 2.3220 (2.3470) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 18:34:50 root] (utils.py 283): INFO Epoch: [13] [1130/2502] eta: 0:17:33 lr: 0.000013 loss_cls: 3.7984 (3.9460) grad_norm: 2.2812 (2.3465) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 18:34:57 root] (utils.py 283): INFO Epoch: [13] [1140/2502] eta: 0:17:26 lr: 0.000013 loss_cls: 3.7488 (3.9446) grad_norm: 2.2878 (2.3470) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:05 root] (utils.py 283): INFO Epoch: [13] [1150/2502] eta: 0:17:18 lr: 0.000013 loss_cls: 3.7488 (3.9439) grad_norm: 2.3900 (2.3479) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:13 root] (utils.py 283): INFO Epoch: [13] [1160/2502] eta: 0:17:10 lr: 0.000013 loss_cls: 3.9102 (3.9426) grad_norm: 2.3387 (2.3479) time: 0.7770 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:21 root] (utils.py 283): INFO Epoch: [13] [1170/2502] eta: 0:17:03 lr: 0.000013 loss_cls: 3.9223 (3.9425) grad_norm: 2.3210 (2.3482) time: 0.7850 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:28 root] (utils.py 283): INFO Epoch: [13] [1180/2502] eta: 0:16:55 lr: 0.000013 loss_cls: 3.9903 (3.9423) grad_norm: 2.3861 (2.3490) time: 0.7739 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:36 root] (utils.py 283): INFO Epoch: [13] [1190/2502] eta: 0:16:47 lr: 0.000013 loss_cls: 4.0637 (3.9420) grad_norm: 2.3861 (2.3493) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:44 root] (utils.py 283): INFO Epoch: [13] [1200/2502] eta: 0:16:40 lr: 0.000013 loss_cls: 3.5933 (3.9388) grad_norm: 2.3078 (2.3491) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:51 root] (utils.py 283): INFO Epoch: [13] [1210/2502] eta: 0:16:32 lr: 0.000013 loss_cls: 3.5933 (3.9384) grad_norm: 2.3857 (2.3500) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 18:35:59 root] (utils.py 283): INFO Epoch: [13] [1220/2502] eta: 0:16:24 lr: 0.000013 loss_cls: 4.1299 (3.9411) grad_norm: 2.3502 (2.3499) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 18:36:07 root] (utils.py 283): INFO Epoch: [13] [1230/2502] eta: 0:16:17 lr: 0.000013 loss_cls: 4.0960 (3.9389) grad_norm: 2.3276 (2.3498) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 18:36:14 root] (utils.py 283): INFO Epoch: [13] [1240/2502] eta: 0:16:09 lr: 0.000013 loss_cls: 3.6712 (3.9367) grad_norm: 2.3173 (2.3496) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 18:36:22 root] (utils.py 283): INFO Epoch: [13] [1250/2502] eta: 0:16:01 lr: 0.000013 loss_cls: 3.7319 (3.9361) grad_norm: 2.3190 (2.3501) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 18:36:30 root] (utils.py 283): INFO Epoch: [13] [1260/2502] eta: 0:15:54 lr: 0.000013 loss_cls: 3.8300 (3.9337) grad_norm: 2.3674 (2.3505) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 18:36:37 root] (utils.py 283): INFO Epoch: [13] [1270/2502] eta: 0:15:46 lr: 0.000013 loss_cls: 3.8300 (3.9326) grad_norm: 2.3755 (2.3505) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 18:36:45 root] (utils.py 283): INFO Epoch: [13] [1280/2502] eta: 0:15:38 lr: 0.000013 loss_cls: 4.2325 (3.9360) grad_norm: 2.3819 (2.3513) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 18:36:53 root] (utils.py 283): INFO Epoch: [13] [1290/2502] eta: 0:15:31 lr: 0.000013 loss_cls: 4.2325 (3.9363) grad_norm: 2.4282 (2.3517) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 18:37:01 root] (utils.py 283): INFO Epoch: [13] [1300/2502] eta: 0:15:23 lr: 0.000013 loss_cls: 4.1494 (3.9362) grad_norm: 2.3650 (2.3516) time: 0.7808 data: 0.0002 max mem: 8426 +[2024-12-10 18:37:08 root] (utils.py 283): INFO Epoch: [13] [1310/2502] eta: 0:15:15 lr: 0.000013 loss_cls: 4.2044 (3.9361) grad_norm: 2.3114 (2.3515) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 18:37:16 root] (utils.py 283): INFO Epoch: [13] [1320/2502] eta: 0:15:08 lr: 0.000013 loss_cls: 4.2360 (3.9370) grad_norm: 2.3114 (2.3515) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 18:37:24 root] (utils.py 283): INFO Epoch: [13] [1330/2502] eta: 0:15:00 lr: 0.000013 loss_cls: 4.2198 (3.9380) grad_norm: 2.3007 (2.3511) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 18:37:31 root] (utils.py 283): INFO Epoch: [13] [1340/2502] eta: 0:14:52 lr: 0.000013 loss_cls: 4.2381 (3.9407) grad_norm: 2.3249 (2.3513) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 18:37:39 root] (utils.py 283): INFO Epoch: [13] [1350/2502] eta: 0:14:44 lr: 0.000013 loss_cls: 4.2495 (3.9421) grad_norm: 2.3715 (2.3514) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 18:37:47 root] (utils.py 283): INFO Epoch: [13] [1360/2502] eta: 0:14:37 lr: 0.000013 loss_cls: 4.1614 (3.9408) grad_norm: 2.3137 (2.3513) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 18:37:54 root] (utils.py 283): INFO Epoch: [13] [1370/2502] eta: 0:14:29 lr: 0.000013 loss_cls: 3.9443 (3.9414) grad_norm: 2.2876 (2.3511) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 18:38:02 root] (utils.py 283): INFO Epoch: [13] [1380/2502] eta: 0:14:21 lr: 0.000013 loss_cls: 3.9276 (3.9395) grad_norm: 2.3043 (2.3512) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 18:38:09 root] (utils.py 283): INFO Epoch: [13] [1390/2502] eta: 0:14:14 lr: 0.000013 loss_cls: 3.6546 (3.9372) grad_norm: 2.3084 (2.3512) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 18:38:17 root] (utils.py 283): INFO Epoch: [13] [1400/2502] eta: 0:14:06 lr: 0.000013 loss_cls: 3.6546 (3.9367) grad_norm: 2.3047 (2.3510) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 18:38:25 root] (utils.py 283): INFO Epoch: [13] [1410/2502] eta: 0:13:58 lr: 0.000013 loss_cls: 4.0528 (3.9374) grad_norm: 2.3217 (2.3509) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 18:38:32 root] (utils.py 283): INFO Epoch: [13] [1420/2502] eta: 0:13:50 lr: 0.000013 loss_cls: 3.9015 (3.9354) grad_norm: 2.3217 (2.3506) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 18:38:40 root] (utils.py 283): INFO Epoch: [13] [1430/2502] eta: 0:13:43 lr: 0.000013 loss_cls: 3.3585 (3.9311) grad_norm: 2.3598 (2.3509) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 18:38:48 root] (utils.py 283): INFO Epoch: [13] [1440/2502] eta: 0:13:35 lr: 0.000013 loss_cls: 3.4889 (3.9319) grad_norm: 2.3737 (2.3509) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 18:38:55 root] (utils.py 283): INFO Epoch: [13] [1450/2502] eta: 0:13:27 lr: 0.000013 loss_cls: 4.0293 (3.9325) grad_norm: 2.3880 (2.3511) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 18:39:03 root] (utils.py 283): INFO Epoch: [13] [1460/2502] eta: 0:13:20 lr: 0.000013 loss_cls: 3.9808 (3.9321) grad_norm: 2.3736 (2.3509) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 18:39:11 root] (utils.py 283): INFO Epoch: [13] [1470/2502] eta: 0:13:12 lr: 0.000013 loss_cls: 4.0911 (3.9311) grad_norm: 2.3488 (2.3511) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 18:39:18 root] (utils.py 283): INFO Epoch: [13] [1480/2502] eta: 0:13:04 lr: 0.000013 loss_cls: 4.0911 (3.9322) grad_norm: 2.3436 (2.3508) time: 0.7745 data: 0.0002 max mem: 8426 +[2024-12-10 18:39:26 root] (utils.py 283): INFO Epoch: [13] [1490/2502] eta: 0:12:57 lr: 0.000013 loss_cls: 4.0703 (3.9320) grad_norm: 2.3116 (2.3507) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 18:39:34 root] (utils.py 283): INFO Epoch: [13] [1500/2502] eta: 0:12:49 lr: 0.000013 loss_cls: 3.9196 (3.9316) grad_norm: 2.3675 (2.3509) time: 0.7796 data: 0.0002 max mem: 8426 +[2024-12-10 18:39:42 root] (utils.py 283): INFO Epoch: [13] [1510/2502] eta: 0:12:42 lr: 0.000013 loss_cls: 3.8037 (3.9310) grad_norm: 2.3491 (2.3504) time: 0.7787 data: 0.0002 max mem: 8426 +[2024-12-10 18:39:50 root] (utils.py 283): INFO Epoch: [13] [1520/2502] eta: 0:12:34 lr: 0.000013 loss_cls: 4.0646 (3.9311) grad_norm: 2.3191 (2.3503) time: 0.7831 data: 0.0003 max mem: 8426 +[2024-12-10 18:39:57 root] (utils.py 283): INFO Epoch: [13] [1530/2502] eta: 0:12:26 lr: 0.000013 loss_cls: 4.0251 (3.9298) grad_norm: 2.3191 (2.3502) time: 0.7803 data: 0.0003 max mem: 8426 +[2024-12-10 18:40:05 root] (utils.py 283): INFO Epoch: [13] [1540/2502] eta: 0:12:19 lr: 0.000013 loss_cls: 3.9373 (3.9311) grad_norm: 2.3246 (2.3502) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 18:40:13 root] (utils.py 283): INFO Epoch: [13] [1550/2502] eta: 0:12:11 lr: 0.000013 loss_cls: 4.0858 (3.9316) grad_norm: 2.3246 (2.3501) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 18:40:21 root] (utils.py 283): INFO Epoch: [13] [1560/2502] eta: 0:12:03 lr: 0.000013 loss_cls: 3.7184 (3.9292) grad_norm: 2.2978 (2.3501) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 18:40:28 root] (utils.py 283): INFO Epoch: [13] [1570/2502] eta: 0:11:56 lr: 0.000013 loss_cls: 3.7612 (3.9300) grad_norm: 2.2934 (2.3502) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 18:40:36 root] (utils.py 283): INFO Epoch: [13] [1580/2502] eta: 0:11:48 lr: 0.000013 loss_cls: 3.9986 (3.9294) grad_norm: 2.3135 (2.3503) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 18:40:44 root] (utils.py 283): INFO Epoch: [13] [1590/2502] eta: 0:11:40 lr: 0.000013 loss_cls: 3.8584 (3.9282) grad_norm: 2.3397 (2.3504) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 18:40:51 root] (utils.py 283): INFO Epoch: [13] [1600/2502] eta: 0:11:33 lr: 0.000013 loss_cls: 4.0063 (3.9292) grad_norm: 2.3981 (2.3506) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:40:59 root] (utils.py 283): INFO Epoch: [13] [1610/2502] eta: 0:11:25 lr: 0.000013 loss_cls: 4.1072 (3.9299) grad_norm: 2.3306 (2.3507) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 18:41:06 root] (utils.py 283): INFO Epoch: [13] [1620/2502] eta: 0:11:17 lr: 0.000013 loss_cls: 4.0119 (3.9297) grad_norm: 2.3237 (2.3505) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 18:41:14 root] (utils.py 283): INFO Epoch: [13] [1630/2502] eta: 0:11:09 lr: 0.000013 loss_cls: 3.9064 (3.9294) grad_norm: 2.3414 (2.3505) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 18:41:22 root] (utils.py 283): INFO Epoch: [13] [1640/2502] eta: 0:11:02 lr: 0.000013 loss_cls: 4.1178 (3.9310) grad_norm: 2.3316 (2.3501) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 18:41:29 root] (utils.py 283): INFO Epoch: [13] [1650/2502] eta: 0:10:54 lr: 0.000013 loss_cls: 3.8558 (3.9279) grad_norm: 2.3481 (2.3506) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 18:41:37 root] (utils.py 283): INFO Epoch: [13] [1660/2502] eta: 0:10:46 lr: 0.000013 loss_cls: 4.0566 (3.9295) grad_norm: 2.3835 (2.3507) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 18:41:45 root] (utils.py 283): INFO Epoch: [13] [1670/2502] eta: 0:10:39 lr: 0.000013 loss_cls: 4.0566 (3.9273) grad_norm: 2.3700 (2.3508) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 18:41:53 root] (utils.py 283): INFO Epoch: [13] [1680/2502] eta: 0:10:31 lr: 0.000013 loss_cls: 3.5862 (3.9263) grad_norm: 2.3700 (2.3509) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-10 18:42:00 root] (utils.py 283): INFO Epoch: [13] [1690/2502] eta: 0:10:23 lr: 0.000013 loss_cls: 3.6264 (3.9250) grad_norm: 2.2778 (2.3505) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 18:42:08 root] (utils.py 283): INFO Epoch: [13] [1700/2502] eta: 0:10:16 lr: 0.000013 loss_cls: 3.5596 (3.9227) grad_norm: 2.3100 (2.3504) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 18:42:16 root] (utils.py 283): INFO Epoch: [13] [1710/2502] eta: 0:10:08 lr: 0.000013 loss_cls: 3.8747 (3.9233) grad_norm: 2.3454 (2.3505) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 18:42:23 root] (utils.py 283): INFO Epoch: [13] [1720/2502] eta: 0:10:00 lr: 0.000013 loss_cls: 4.2484 (3.9247) grad_norm: 2.3019 (2.3506) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 18:42:31 root] (utils.py 283): INFO Epoch: [13] [1730/2502] eta: 0:09:53 lr: 0.000013 loss_cls: 4.2072 (3.9241) grad_norm: 2.3019 (2.3504) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 18:42:39 root] (utils.py 283): INFO Epoch: [13] [1740/2502] eta: 0:09:45 lr: 0.000013 loss_cls: 3.7438 (3.9225) grad_norm: 2.3116 (2.3500) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 18:42:46 root] (utils.py 283): INFO Epoch: [13] [1750/2502] eta: 0:09:37 lr: 0.000013 loss_cls: 4.1545 (3.9242) grad_norm: 2.3098 (2.3499) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 18:42:54 root] (utils.py 283): INFO Epoch: [13] [1760/2502] eta: 0:09:29 lr: 0.000013 loss_cls: 4.1545 (3.9244) grad_norm: 2.3076 (2.3497) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 18:43:01 root] (utils.py 283): INFO Epoch: [13] [1770/2502] eta: 0:09:22 lr: 0.000013 loss_cls: 3.9286 (3.9239) grad_norm: 2.2874 (2.3500) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 18:43:09 root] (utils.py 283): INFO Epoch: [13] [1780/2502] eta: 0:09:14 lr: 0.000013 loss_cls: 4.0541 (3.9254) grad_norm: 2.3639 (2.3501) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 18:43:17 root] (utils.py 283): INFO Epoch: [13] [1790/2502] eta: 0:09:06 lr: 0.000013 loss_cls: 4.0924 (3.9253) grad_norm: 2.3711 (2.3505) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 18:43:24 root] (utils.py 283): INFO Epoch: [13] [1800/2502] eta: 0:08:59 lr: 0.000013 loss_cls: 4.1347 (3.9268) grad_norm: 2.3823 (2.3505) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 18:43:32 root] (utils.py 283): INFO Epoch: [13] [1810/2502] eta: 0:08:51 lr: 0.000013 loss_cls: 4.2128 (3.9271) grad_norm: 2.3257 (2.3507) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 18:43:40 root] (utils.py 283): INFO Epoch: [13] [1820/2502] eta: 0:08:43 lr: 0.000013 loss_cls: 3.6897 (3.9246) grad_norm: 2.2975 (2.3508) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 18:43:47 root] (utils.py 283): INFO Epoch: [13] [1830/2502] eta: 0:08:36 lr: 0.000013 loss_cls: 3.8314 (3.9247) grad_norm: 2.2975 (2.3505) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 18:43:55 root] (utils.py 283): INFO Epoch: [13] [1840/2502] eta: 0:08:28 lr: 0.000013 loss_cls: 4.1248 (3.9259) grad_norm: 2.2844 (2.3502) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 18:44:03 root] (utils.py 283): INFO Epoch: [13] [1850/2502] eta: 0:08:20 lr: 0.000013 loss_cls: 4.2630 (3.9281) grad_norm: 2.2736 (2.3498) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 18:44:10 root] (utils.py 283): INFO Epoch: [13] [1860/2502] eta: 0:08:13 lr: 0.000013 loss_cls: 4.1163 (3.9277) grad_norm: 2.2696 (2.3495) time: 0.7687 data: 0.0003 max mem: 8426 +[2024-12-10 18:44:18 root] (utils.py 283): INFO Epoch: [13] [1870/2502] eta: 0:08:05 lr: 0.000013 loss_cls: 3.8892 (3.9283) grad_norm: 2.3004 (2.3496) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 18:44:26 root] (utils.py 283): INFO Epoch: [13] [1880/2502] eta: 0:07:57 lr: 0.000013 loss_cls: 3.9672 (3.9286) grad_norm: 2.3822 (2.3500) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 18:44:33 root] (utils.py 283): INFO Epoch: [13] [1890/2502] eta: 0:07:49 lr: 0.000013 loss_cls: 3.9672 (3.9284) grad_norm: 2.3123 (2.3497) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 18:44:41 root] (utils.py 283): INFO Epoch: [13] [1900/2502] eta: 0:07:42 lr: 0.000013 loss_cls: 3.7310 (3.9268) grad_norm: 2.2762 (2.3496) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 18:44:49 root] (utils.py 283): INFO Epoch: [13] [1910/2502] eta: 0:07:34 lr: 0.000013 loss_cls: 3.6729 (3.9267) grad_norm: 2.3574 (2.3497) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 18:44:56 root] (utils.py 283): INFO Epoch: [13] [1920/2502] eta: 0:07:26 lr: 0.000013 loss_cls: 4.0061 (3.9271) grad_norm: 2.3314 (2.3495) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 18:45:04 root] (utils.py 283): INFO Epoch: [13] [1930/2502] eta: 0:07:19 lr: 0.000013 loss_cls: 4.0535 (3.9274) grad_norm: 2.2504 (2.3492) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 18:45:12 root] (utils.py 283): INFO Epoch: [13] [1940/2502] eta: 0:07:11 lr: 0.000013 loss_cls: 3.8739 (3.9269) grad_norm: 2.3069 (2.3494) time: 0.7757 data: 0.0003 max mem: 8426 +[2024-12-10 18:45:19 root] (utils.py 283): INFO Epoch: [13] [1950/2502] eta: 0:07:03 lr: 0.000013 loss_cls: 3.8134 (3.9269) grad_norm: 2.3460 (2.3495) time: 0.7781 data: 0.0003 max mem: 8426 +[2024-12-10 18:45:27 root] (utils.py 283): INFO Epoch: [13] [1960/2502] eta: 0:06:56 lr: 0.000013 loss_cls: 4.2485 (3.9284) grad_norm: 2.3460 (2.3496) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 18:45:35 root] (utils.py 283): INFO Epoch: [13] [1970/2502] eta: 0:06:48 lr: 0.000013 loss_cls: 4.3046 (3.9288) grad_norm: 2.4019 (2.3500) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 18:45:42 root] (utils.py 283): INFO Epoch: [13] [1980/2502] eta: 0:06:40 lr: 0.000013 loss_cls: 4.1126 (3.9285) grad_norm: 2.3723 (2.3498) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 18:45:50 root] (utils.py 283): INFO Epoch: [13] [1990/2502] eta: 0:06:33 lr: 0.000013 loss_cls: 4.1026 (3.9282) grad_norm: 2.2813 (2.3498) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 18:45:58 root] (utils.py 283): INFO Epoch: [13] [2000/2502] eta: 0:06:25 lr: 0.000013 loss_cls: 3.7272 (3.9277) grad_norm: 2.2813 (2.3495) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 18:46:05 root] (utils.py 283): INFO Epoch: [13] [2010/2502] eta: 0:06:17 lr: 0.000013 loss_cls: 3.6810 (3.9270) grad_norm: 2.3120 (2.3499) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 18:46:13 root] (utils.py 283): INFO Epoch: [13] [2020/2502] eta: 0:06:10 lr: 0.000013 loss_cls: 3.6324 (3.9261) grad_norm: 2.3633 (2.3500) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 18:46:21 root] (utils.py 283): INFO Epoch: [13] [2030/2502] eta: 0:06:02 lr: 0.000013 loss_cls: 4.0971 (3.9264) grad_norm: 2.3431 (2.3501) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 18:46:28 root] (utils.py 283): INFO Epoch: [13] [2040/2502] eta: 0:05:54 lr: 0.000013 loss_cls: 4.1380 (3.9263) grad_norm: 2.2849 (2.3500) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 18:46:36 root] (utils.py 283): INFO Epoch: [13] [2050/2502] eta: 0:05:47 lr: 0.000013 loss_cls: 4.0439 (3.9262) grad_norm: 2.2450 (2.3495) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 18:46:44 root] (utils.py 283): INFO Epoch: [13] [2060/2502] eta: 0:05:39 lr: 0.000013 loss_cls: 4.0826 (3.9264) grad_norm: 2.2619 (2.3494) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 18:46:51 root] (utils.py 283): INFO Epoch: [13] [2070/2502] eta: 0:05:31 lr: 0.000013 loss_cls: 3.8713 (3.9253) grad_norm: 2.3153 (2.3493) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 18:46:59 root] (utils.py 283): INFO Epoch: [13] [2080/2502] eta: 0:05:24 lr: 0.000013 loss_cls: 3.9374 (3.9256) grad_norm: 2.3559 (2.3495) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 18:47:07 root] (utils.py 283): INFO Epoch: [13] [2090/2502] eta: 0:05:16 lr: 0.000013 loss_cls: 3.9374 (3.9241) grad_norm: 2.3206 (2.3493) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 18:47:14 root] (utils.py 283): INFO Epoch: [13] [2100/2502] eta: 0:05:08 lr: 0.000013 loss_cls: 3.7228 (3.9235) grad_norm: 2.3172 (2.3495) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 18:47:22 root] (utils.py 283): INFO Epoch: [13] [2110/2502] eta: 0:05:00 lr: 0.000013 loss_cls: 3.9043 (3.9235) grad_norm: 2.2989 (2.3492) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 18:47:30 root] (utils.py 283): INFO Epoch: [13] [2120/2502] eta: 0:04:53 lr: 0.000013 loss_cls: 3.7608 (3.9219) grad_norm: 2.2498 (2.3491) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 18:47:37 root] (utils.py 283): INFO Epoch: [13] [2130/2502] eta: 0:04:45 lr: 0.000013 loss_cls: 3.6117 (3.9215) grad_norm: 2.3242 (2.3491) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 18:47:45 root] (utils.py 283): INFO Epoch: [13] [2140/2502] eta: 0:04:37 lr: 0.000013 loss_cls: 4.0249 (3.9222) grad_norm: 2.3739 (2.3493) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 18:47:52 root] (utils.py 283): INFO Epoch: [13] [2150/2502] eta: 0:04:30 lr: 0.000013 loss_cls: 4.0964 (3.9220) grad_norm: 2.3988 (2.3499) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 18:48:00 root] (utils.py 283): INFO Epoch: [13] [2160/2502] eta: 0:04:22 lr: 0.000013 loss_cls: 4.2295 (3.9228) grad_norm: 2.3827 (2.3498) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 18:48:08 root] (utils.py 283): INFO Epoch: [13] [2170/2502] eta: 0:04:14 lr: 0.000013 loss_cls: 4.2366 (3.9246) grad_norm: 2.3488 (2.3500) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 18:48:15 root] (utils.py 283): INFO Epoch: [13] [2180/2502] eta: 0:04:07 lr: 0.000013 loss_cls: 4.3710 (3.9269) grad_norm: 2.3654 (2.3502) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 18:48:23 root] (utils.py 283): INFO Epoch: [13] [2190/2502] eta: 0:03:59 lr: 0.000013 loss_cls: 4.2762 (3.9278) grad_norm: 2.3585 (2.3505) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 18:48:31 root] (utils.py 283): INFO Epoch: [13] [2200/2502] eta: 0:03:51 lr: 0.000013 loss_cls: 4.1002 (3.9280) grad_norm: 2.3095 (2.3501) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 18:48:38 root] (utils.py 283): INFO Epoch: [13] [2210/2502] eta: 0:03:44 lr: 0.000013 loss_cls: 4.0363 (3.9271) grad_norm: 2.3000 (2.3504) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 18:48:46 root] (utils.py 283): INFO Epoch: [13] [2220/2502] eta: 0:03:36 lr: 0.000013 loss_cls: 3.8073 (3.9272) grad_norm: 2.3728 (2.3505) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 18:48:54 root] (utils.py 283): INFO Epoch: [13] [2230/2502] eta: 0:03:28 lr: 0.000013 loss_cls: 4.1208 (3.9286) grad_norm: 2.3315 (2.3506) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 18:49:01 root] (utils.py 283): INFO Epoch: [13] [2240/2502] eta: 0:03:21 lr: 0.000013 loss_cls: 4.1223 (3.9292) grad_norm: 2.3620 (2.3509) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 18:49:09 root] (utils.py 283): INFO Epoch: [13] [2250/2502] eta: 0:03:13 lr: 0.000013 loss_cls: 4.1130 (3.9299) grad_norm: 2.3495 (2.3508) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 18:49:17 root] (utils.py 283): INFO Epoch: [13] [2260/2502] eta: 0:03:05 lr: 0.000013 loss_cls: 4.1130 (3.9296) grad_norm: 2.2888 (2.3505) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 18:49:24 root] (utils.py 283): INFO Epoch: [13] [2270/2502] eta: 0:02:58 lr: 0.000013 loss_cls: 3.6504 (3.9291) grad_norm: 2.2753 (2.3503) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 18:49:32 root] (utils.py 283): INFO Epoch: [13] [2280/2502] eta: 0:02:50 lr: 0.000013 loss_cls: 3.8255 (3.9282) grad_norm: 2.3492 (2.3505) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 18:49:40 root] (utils.py 283): INFO Epoch: [13] [2290/2502] eta: 0:02:42 lr: 0.000013 loss_cls: 4.1451 (3.9286) grad_norm: 2.3705 (2.3504) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 18:49:48 root] (utils.py 283): INFO Epoch: [13] [2300/2502] eta: 0:02:35 lr: 0.000013 loss_cls: 3.9554 (3.9281) grad_norm: 2.3305 (2.3504) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 18:49:55 root] (utils.py 283): INFO Epoch: [13] [2310/2502] eta: 0:02:27 lr: 0.000013 loss_cls: 3.9554 (3.9292) grad_norm: 2.3494 (2.3506) time: 0.7858 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:03 root] (utils.py 283): INFO Epoch: [13] [2320/2502] eta: 0:02:19 lr: 0.000013 loss_cls: 4.1056 (3.9291) grad_norm: 2.3595 (2.3506) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:11 root] (utils.py 283): INFO Epoch: [13] [2330/2502] eta: 0:02:12 lr: 0.000013 loss_cls: 4.1056 (3.9301) grad_norm: 2.3480 (2.3505) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:18 root] (utils.py 283): INFO Epoch: [13] [2340/2502] eta: 0:02:04 lr: 0.000013 loss_cls: 4.0944 (3.9302) grad_norm: 2.2873 (2.3504) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:26 root] (utils.py 283): INFO Epoch: [13] [2350/2502] eta: 0:01:56 lr: 0.000013 loss_cls: 4.1018 (3.9315) grad_norm: 2.2878 (2.3505) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:34 root] (utils.py 283): INFO Epoch: [13] [2360/2502] eta: 0:01:49 lr: 0.000013 loss_cls: 4.1002 (3.9304) grad_norm: 2.3273 (2.3506) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 18:50:41 root] (utils.py 283): INFO Epoch: [13] [2370/2502] eta: 0:01:41 lr: 0.000013 loss_cls: 3.7426 (3.9297) grad_norm: 2.3153 (2.3503) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:49 root] (utils.py 283): INFO Epoch: [13] [2380/2502] eta: 0:01:33 lr: 0.000013 loss_cls: 3.9704 (3.9298) grad_norm: 2.3168 (2.3503) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 18:50:57 root] (utils.py 283): INFO Epoch: [13] [2390/2502] eta: 0:01:25 lr: 0.000013 loss_cls: 4.0370 (3.9304) grad_norm: 2.3301 (2.3502) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:04 root] (utils.py 283): INFO Epoch: [13] [2400/2502] eta: 0:01:18 lr: 0.000013 loss_cls: 4.0118 (3.9306) grad_norm: 2.3155 (2.3501) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:12 root] (utils.py 283): INFO Epoch: [13] [2410/2502] eta: 0:01:10 lr: 0.000013 loss_cls: 3.8173 (3.9307) grad_norm: 2.3614 (2.3504) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:20 root] (utils.py 283): INFO Epoch: [13] [2420/2502] eta: 0:01:02 lr: 0.000013 loss_cls: 4.0719 (3.9314) grad_norm: 2.3904 (2.3504) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:27 root] (utils.py 283): INFO Epoch: [13] [2430/2502] eta: 0:00:55 lr: 0.000013 loss_cls: 4.1389 (3.9325) grad_norm: 2.3067 (2.3502) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:35 root] (utils.py 283): INFO Epoch: [13] [2440/2502] eta: 0:00:47 lr: 0.000013 loss_cls: 4.1327 (3.9325) grad_norm: 2.3680 (2.3504) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:43 root] (utils.py 283): INFO Epoch: [13] [2450/2502] eta: 0:00:39 lr: 0.000013 loss_cls: 3.9747 (3.9331) grad_norm: 2.3839 (2.3504) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:50 root] (utils.py 283): INFO Epoch: [13] [2460/2502] eta: 0:00:32 lr: 0.000013 loss_cls: 3.8993 (3.9314) grad_norm: 2.3333 (2.3505) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 18:51:58 root] (utils.py 283): INFO Epoch: [13] [2470/2502] eta: 0:00:24 lr: 0.000013 loss_cls: 3.7106 (3.9312) grad_norm: 2.3547 (2.3507) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 18:52:06 root] (utils.py 283): INFO Epoch: [13] [2480/2502] eta: 0:00:16 lr: 0.000013 loss_cls: 4.0113 (3.9316) grad_norm: 2.3547 (2.3505) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 18:52:14 root] (utils.py 283): INFO Epoch: [13] [2490/2502] eta: 0:00:09 lr: 0.000013 loss_cls: 3.8908 (3.9307) grad_norm: 2.3708 (2.3507) time: 0.7928 data: 0.0246 max mem: 8426 +[2024-12-10 18:52:22 root] (utils.py 283): INFO Epoch: [13] [2500/2502] eta: 0:00:01 lr: 0.000013 loss_cls: 3.9306 (3.9308) grad_norm: 2.2936 (2.3505) time: 0.7916 data: 0.0246 max mem: 8426 +[2024-12-10 18:52:22 root] (utils.py 283): INFO Epoch: [13] [2501/2502] eta: 0:00:00 lr: 0.000013 loss_cls: 3.5953 (3.9304) grad_norm: 2.2936 (2.3505) time: 0.7902 data: 0.0246 max mem: 8426 +[2024-12-10 18:52:22 root] (utils.py 297): INFO Epoch: [13] Total time: 0:32:01 (0.7680 s / it) +[2024-12-10 18:52:22 root] (engine.py 179): INFO Averaged stats:lr: 0.000013 loss_cls: 3.5953 (3.9281) grad_norm: 2.2936 (2.3505) +[2024-12-10 18:52:23 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6124 (0.6124) acc1: 85.9375 (85.9375) acc3: 96.8750 (96.8750) acc5: 99.2188 (99.2188) time: 0.1275 data: 0.0003 max mem: 8426 +[2024-12-10 18:52:24 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7427 (0.8019) acc1: 85.9375 (82.7415) acc3: 95.3125 (93.6080) acc5: 97.6562 (96.5909) time: 0.1277 data: 0.0004 max mem: 8426 +[2024-12-10 18:52:25 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8337 (0.8520) acc1: 78.9062 (81.2872) acc3: 91.4062 (92.8943) acc5: 95.3125 (95.7217) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 18:52:27 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9347 (0.8725) acc1: 78.9062 (80.2671) acc3: 92.1875 (92.9940) acc5: 95.3125 (95.8165) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 18:52:28 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8385 (0.8651) acc1: 78.9062 (80.5640) acc3: 93.7500 (93.0831) acc5: 96.0938 (95.8270) time: 0.1327 data: 0.0044 max mem: 8426 +[2024-12-10 18:52:30 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0492 (0.9528) acc1: 76.5625 (78.5692) acc3: 88.2812 (91.5441) acc5: 92.1875 (94.6998) time: 0.1600 data: 0.0318 max mem: 8426 +[2024-12-10 18:52:31 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2354 (0.9985) acc1: 71.0938 (77.9329) acc3: 85.9375 (90.6634) acc5: 89.8438 (93.9165) time: 0.1705 data: 0.0428 max mem: 8426 +[2024-12-10 18:52:33 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2012 (1.0384) acc1: 73.4375 (77.0577) acc3: 86.7188 (90.1078) acc5: 89.8438 (93.4419) time: 0.1434 data: 0.0155 max mem: 8426 +[2024-12-10 18:52:34 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2012 (1.0714) acc1: 73.4375 (76.2539) acc3: 86.7188 (89.6123) acc5: 89.8438 (92.9495) time: 0.1318 data: 0.0032 max mem: 8426 +[2024-12-10 18:52:36 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2439 (1.0990) acc1: 71.8750 (75.6181) acc3: 86.7188 (89.2943) acc5: 89.8438 (92.7370) time: 0.1556 data: 0.0272 max mem: 8426 +[2024-12-10 18:52:37 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1481 (1.0869) acc1: 72.6562 (75.9360) acc3: 88.2812 (89.4640) acc5: 91.4062 (92.8640) time: 0.1658 data: 0.0398 max mem: 8426 +[2024-12-10 18:52:37 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1462 s / it) +[2024-12-10 18:52:37 root] (engine.py 264): INFO * Acc@1 75.648 Acc@3 89.556 Acc@5 92.786 loss 1.088 flops 1.285 layer_flops 1.251 +[2024-12-10 18:52:37 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 18:52:37 root] (main.py 576): INFO Max accuracy: 75.65% +[2024-12-10 18:52:38 root] (utils.py 283): INFO Epoch: [14] [ 0/2502] eta: 0:31:20 lr: 0.000012 loss_cls: 4.3645 (4.3645) grad_norm: 2.4123 (2.4123) time: 0.7518 data: 0.0003 max mem: 8426 +[2024-12-10 18:52:46 root] (utils.py 283): INFO Epoch: [14] [ 10/2502] eta: 0:31:22 lr: 0.000012 loss_cls: 4.3455 (4.1373) grad_norm: 2.3685 (2.3500) time: 0.7554 data: 0.0003 max mem: 8426 +[2024-12-10 18:52:53 root] (utils.py 283): INFO Epoch: [14] [ 20/2502] eta: 0:31:21 lr: 0.000012 loss_cls: 3.7802 (3.9079) grad_norm: 2.3381 (2.3561) time: 0.7583 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:01 root] (utils.py 283): INFO Epoch: [14] [ 30/2502] eta: 0:31:20 lr: 0.000012 loss_cls: 3.7373 (3.9193) grad_norm: 2.3305 (2.3543) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:09 root] (utils.py 283): INFO Epoch: [14] [ 40/2502] eta: 0:31:28 lr: 0.000012 loss_cls: 3.9216 (3.9267) grad_norm: 2.3451 (2.3548) time: 0.7769 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:16 root] (utils.py 283): INFO Epoch: [14] [ 50/2502] eta: 0:31:19 lr: 0.000012 loss_cls: 3.9632 (3.8714) grad_norm: 2.3162 (2.3436) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:24 root] (utils.py 283): INFO Epoch: [14] [ 60/2502] eta: 0:31:11 lr: 0.000012 loss_cls: 3.9775 (3.9257) grad_norm: 2.3162 (2.3412) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:32 root] (utils.py 283): INFO Epoch: [14] [ 70/2502] eta: 0:31:06 lr: 0.000012 loss_cls: 4.2421 (3.9314) grad_norm: 2.3540 (2.3454) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:39 root] (utils.py 283): INFO Epoch: [14] [ 80/2502] eta: 0:30:57 lr: 0.000012 loss_cls: 3.9456 (3.9026) grad_norm: 2.3194 (2.3345) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 18:53:47 root] (utils.py 283): INFO Epoch: [14] [ 90/2502] eta: 0:30:48 lr: 0.000012 loss_cls: 4.0770 (3.9348) grad_norm: 2.2525 (2.3267) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 18:53:55 root] (utils.py 283): INFO Epoch: [14] [ 100/2502] eta: 0:30:40 lr: 0.000012 loss_cls: 4.2408 (3.9583) grad_norm: 2.2880 (2.3248) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 18:54:02 root] (utils.py 283): INFO Epoch: [14] [ 110/2502] eta: 0:30:32 lr: 0.000012 loss_cls: 4.2743 (3.9824) grad_norm: 2.3336 (2.3330) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 18:54:10 root] (utils.py 283): INFO Epoch: [14] [ 120/2502] eta: 0:30:25 lr: 0.000012 loss_cls: 4.0836 (3.9747) grad_norm: 2.3607 (2.3328) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 18:54:18 root] (utils.py 283): INFO Epoch: [14] [ 130/2502] eta: 0:30:18 lr: 0.000012 loss_cls: 4.0836 (3.9782) grad_norm: 2.3607 (2.3336) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 18:54:25 root] (utils.py 283): INFO Epoch: [14] [ 140/2502] eta: 0:30:09 lr: 0.000012 loss_cls: 4.2689 (3.9899) grad_norm: 2.3638 (2.3340) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 18:54:33 root] (utils.py 283): INFO Epoch: [14] [ 150/2502] eta: 0:30:01 lr: 0.000012 loss_cls: 4.1097 (3.9919) grad_norm: 2.3691 (2.3416) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 18:54:41 root] (utils.py 283): INFO Epoch: [14] [ 160/2502] eta: 0:29:54 lr: 0.000012 loss_cls: 4.2480 (4.0154) grad_norm: 2.3649 (2.3425) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 18:54:49 root] (utils.py 283): INFO Epoch: [14] [ 170/2502] eta: 0:29:48 lr: 0.000012 loss_cls: 4.1985 (4.0157) grad_norm: 2.3319 (2.3416) time: 0.7738 data: 0.0002 max mem: 8426 +[2024-12-10 18:54:56 root] (utils.py 283): INFO Epoch: [14] [ 180/2502] eta: 0:29:42 lr: 0.000012 loss_cls: 4.1841 (4.0233) grad_norm: 2.3014 (2.3403) time: 0.7778 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:04 root] (utils.py 283): INFO Epoch: [14] [ 190/2502] eta: 0:29:34 lr: 0.000012 loss_cls: 4.2146 (4.0198) grad_norm: 2.3475 (2.3437) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:12 root] (utils.py 283): INFO Epoch: [14] [ 200/2502] eta: 0:29:26 lr: 0.000012 loss_cls: 4.0394 (4.0188) grad_norm: 2.3503 (2.3399) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:19 root] (utils.py 283): INFO Epoch: [14] [ 210/2502] eta: 0:29:18 lr: 0.000012 loss_cls: 4.0225 (4.0119) grad_norm: 2.2486 (2.3357) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:27 root] (utils.py 283): INFO Epoch: [14] [ 220/2502] eta: 0:29:10 lr: 0.000012 loss_cls: 4.2112 (4.0301) grad_norm: 2.2716 (2.3355) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:35 root] (utils.py 283): INFO Epoch: [14] [ 230/2502] eta: 0:29:03 lr: 0.000012 loss_cls: 4.2265 (4.0327) grad_norm: 2.3311 (2.3334) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:42 root] (utils.py 283): INFO Epoch: [14] [ 240/2502] eta: 0:28:55 lr: 0.000012 loss_cls: 4.1469 (4.0273) grad_norm: 2.3421 (2.3343) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:50 root] (utils.py 283): INFO Epoch: [14] [ 250/2502] eta: 0:28:47 lr: 0.000012 loss_cls: 3.8060 (4.0116) grad_norm: 2.3421 (2.3361) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 18:55:58 root] (utils.py 283): INFO Epoch: [14] [ 260/2502] eta: 0:28:40 lr: 0.000012 loss_cls: 3.6318 (3.9946) grad_norm: 2.3271 (2.3371) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:05 root] (utils.py 283): INFO Epoch: [14] [ 270/2502] eta: 0:28:32 lr: 0.000012 loss_cls: 3.8121 (3.9909) grad_norm: 2.3493 (2.3394) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:13 root] (utils.py 283): INFO Epoch: [14] [ 280/2502] eta: 0:28:24 lr: 0.000012 loss_cls: 3.9054 (3.9885) grad_norm: 2.3468 (2.3382) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:21 root] (utils.py 283): INFO Epoch: [14] [ 290/2502] eta: 0:28:16 lr: 0.000012 loss_cls: 3.7844 (3.9812) grad_norm: 2.3090 (2.3382) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:28 root] (utils.py 283): INFO Epoch: [14] [ 300/2502] eta: 0:28:07 lr: 0.000012 loss_cls: 3.7968 (3.9767) grad_norm: 2.3576 (2.3384) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:36 root] (utils.py 283): INFO Epoch: [14] [ 310/2502] eta: 0:28:00 lr: 0.000012 loss_cls: 4.1548 (3.9736) grad_norm: 2.3528 (2.3402) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:43 root] (utils.py 283): INFO Epoch: [14] [ 320/2502] eta: 0:27:52 lr: 0.000012 loss_cls: 3.7773 (3.9669) grad_norm: 2.3556 (2.3424) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:51 root] (utils.py 283): INFO Epoch: [14] [ 330/2502] eta: 0:27:44 lr: 0.000012 loss_cls: 3.7555 (3.9590) grad_norm: 2.3302 (2.3403) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 18:56:59 root] (utils.py 283): INFO Epoch: [14] [ 340/2502] eta: 0:27:36 lr: 0.000012 loss_cls: 3.8678 (3.9598) grad_norm: 2.3190 (2.3410) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 18:57:06 root] (utils.py 283): INFO Epoch: [14] [ 350/2502] eta: 0:27:28 lr: 0.000012 loss_cls: 3.7715 (3.9484) grad_norm: 2.3264 (2.3393) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 18:57:14 root] (utils.py 283): INFO Epoch: [14] [ 360/2502] eta: 0:27:19 lr: 0.000012 loss_cls: 3.7715 (3.9506) grad_norm: 2.2661 (2.3397) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 18:57:21 root] (utils.py 283): INFO Epoch: [14] [ 370/2502] eta: 0:27:11 lr: 0.000012 loss_cls: 3.5750 (3.9344) grad_norm: 2.2661 (2.3393) time: 0.7576 data: 0.0003 max mem: 8426 +[2024-12-10 18:57:29 root] (utils.py 283): INFO Epoch: [14] [ 380/2502] eta: 0:27:03 lr: 0.000012 loss_cls: 3.3751 (3.9274) grad_norm: 2.2365 (2.3372) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 18:57:36 root] (utils.py 283): INFO Epoch: [14] [ 390/2502] eta: 0:26:55 lr: 0.000012 loss_cls: 3.9338 (3.9306) grad_norm: 2.3389 (2.3404) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 18:57:44 root] (utils.py 283): INFO Epoch: [14] [ 400/2502] eta: 0:26:47 lr: 0.000012 loss_cls: 3.9338 (3.9313) grad_norm: 2.3865 (2.3401) time: 0.7556 data: 0.0002 max mem: 8426 +[2024-12-10 18:57:52 root] (utils.py 283): INFO Epoch: [14] [ 410/2502] eta: 0:26:39 lr: 0.000012 loss_cls: 4.0231 (3.9338) grad_norm: 2.3487 (2.3412) time: 0.7561 data: 0.0002 max mem: 8426 +[2024-12-10 18:57:59 root] (utils.py 283): INFO Epoch: [14] [ 420/2502] eta: 0:26:32 lr: 0.000012 loss_cls: 3.9600 (3.9293) grad_norm: 2.3896 (2.3426) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 18:58:07 root] (utils.py 283): INFO Epoch: [14] [ 430/2502] eta: 0:26:24 lr: 0.000012 loss_cls: 3.7982 (3.9276) grad_norm: 2.4034 (2.3448) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 18:58:14 root] (utils.py 283): INFO Epoch: [14] [ 440/2502] eta: 0:26:16 lr: 0.000012 loss_cls: 4.2225 (3.9332) grad_norm: 2.3572 (2.3439) time: 0.7570 data: 0.0002 max mem: 8426 +[2024-12-10 18:58:22 root] (utils.py 283): INFO Epoch: [14] [ 450/2502] eta: 0:26:08 lr: 0.000012 loss_cls: 4.1488 (3.9353) grad_norm: 2.3063 (2.3452) time: 0.7553 data: 0.0002 max mem: 8426 +[2024-12-10 18:58:30 root] (utils.py 283): INFO Epoch: [14] [ 460/2502] eta: 0:26:00 lr: 0.000012 loss_cls: 4.0161 (3.9353) grad_norm: 2.3552 (2.3452) time: 0.7581 data: 0.0003 max mem: 8426 +[2024-12-10 18:58:37 root] (utils.py 283): INFO Epoch: [14] [ 470/2502] eta: 0:25:52 lr: 0.000012 loss_cls: 4.0760 (3.9349) grad_norm: 2.3324 (2.3440) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 18:58:45 root] (utils.py 283): INFO Epoch: [14] [ 480/2502] eta: 0:25:45 lr: 0.000012 loss_cls: 4.0233 (3.9336) grad_norm: 2.3253 (2.3453) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 18:58:53 root] (utils.py 283): INFO Epoch: [14] [ 490/2502] eta: 0:25:37 lr: 0.000012 loss_cls: 4.1031 (3.9366) grad_norm: 2.3648 (2.3464) time: 0.7736 data: 0.0003 max mem: 8426 +[2024-12-10 18:59:00 root] (utils.py 283): INFO Epoch: [14] [ 500/2502] eta: 0:25:29 lr: 0.000012 loss_cls: 4.0958 (3.9355) grad_norm: 2.3665 (2.3466) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:08 root] (utils.py 283): INFO Epoch: [14] [ 510/2502] eta: 0:25:22 lr: 0.000012 loss_cls: 3.7237 (3.9296) grad_norm: 2.3785 (2.3478) time: 0.7577 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:15 root] (utils.py 283): INFO Epoch: [14] [ 520/2502] eta: 0:25:14 lr: 0.000012 loss_cls: 3.7697 (3.9304) grad_norm: 2.3825 (2.3498) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:23 root] (utils.py 283): INFO Epoch: [14] [ 530/2502] eta: 0:25:06 lr: 0.000012 loss_cls: 3.9996 (3.9283) grad_norm: 2.3430 (2.3493) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:31 root] (utils.py 283): INFO Epoch: [14] [ 540/2502] eta: 0:24:58 lr: 0.000012 loss_cls: 3.9996 (3.9312) grad_norm: 2.3273 (2.3496) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:38 root] (utils.py 283): INFO Epoch: [14] [ 550/2502] eta: 0:24:51 lr: 0.000012 loss_cls: 4.1497 (3.9330) grad_norm: 2.4007 (2.3517) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:46 root] (utils.py 283): INFO Epoch: [14] [ 560/2502] eta: 0:24:43 lr: 0.000012 loss_cls: 4.1497 (3.9333) grad_norm: 2.4095 (2.3524) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 18:59:54 root] (utils.py 283): INFO Epoch: [14] [ 570/2502] eta: 0:24:35 lr: 0.000012 loss_cls: 4.2297 (3.9365) grad_norm: 2.3323 (2.3533) time: 0.7590 data: 0.0002 max mem: 8426 +[2024-12-10 19:00:01 root] (utils.py 283): INFO Epoch: [14] [ 580/2502] eta: 0:24:28 lr: 0.000012 loss_cls: 4.2297 (3.9430) grad_norm: 2.3121 (2.3526) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 19:00:09 root] (utils.py 283): INFO Epoch: [14] [ 590/2502] eta: 0:24:20 lr: 0.000012 loss_cls: 4.2191 (3.9420) grad_norm: 2.3385 (2.3525) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 19:00:17 root] (utils.py 283): INFO Epoch: [14] [ 600/2502] eta: 0:24:13 lr: 0.000012 loss_cls: 4.2369 (3.9485) grad_norm: 2.3656 (2.3528) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 19:00:24 root] (utils.py 283): INFO Epoch: [14] [ 610/2502] eta: 0:24:05 lr: 0.000012 loss_cls: 4.2369 (3.9474) grad_norm: 2.2901 (2.3515) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 19:00:32 root] (utils.py 283): INFO Epoch: [14] [ 620/2502] eta: 0:23:58 lr: 0.000012 loss_cls: 3.8006 (3.9405) grad_norm: 2.2967 (2.3520) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 19:00:40 root] (utils.py 283): INFO Epoch: [14] [ 630/2502] eta: 0:23:51 lr: 0.000012 loss_cls: 3.9277 (3.9438) grad_norm: 2.3098 (2.3518) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 19:00:48 root] (utils.py 283): INFO Epoch: [14] [ 640/2502] eta: 0:23:44 lr: 0.000012 loss_cls: 3.9277 (3.9383) grad_norm: 2.3247 (2.3530) time: 0.7875 data: 0.0003 max mem: 8426 +[2024-12-10 19:00:55 root] (utils.py 283): INFO Epoch: [14] [ 650/2502] eta: 0:23:36 lr: 0.000012 loss_cls: 3.5361 (3.9354) grad_norm: 2.3168 (2.3526) time: 0.7782 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:03 root] (utils.py 283): INFO Epoch: [14] [ 660/2502] eta: 0:23:29 lr: 0.000012 loss_cls: 4.0292 (3.9333) grad_norm: 2.2853 (2.3521) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:11 root] (utils.py 283): INFO Epoch: [14] [ 670/2502] eta: 0:23:21 lr: 0.000012 loss_cls: 4.0659 (3.9341) grad_norm: 2.3168 (2.3524) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:18 root] (utils.py 283): INFO Epoch: [14] [ 680/2502] eta: 0:23:13 lr: 0.000012 loss_cls: 4.1307 (3.9333) grad_norm: 2.3340 (2.3522) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:26 root] (utils.py 283): INFO Epoch: [14] [ 690/2502] eta: 0:23:06 lr: 0.000012 loss_cls: 3.5149 (3.9254) grad_norm: 2.3756 (2.3528) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:34 root] (utils.py 283): INFO Epoch: [14] [ 700/2502] eta: 0:22:58 lr: 0.000012 loss_cls: 3.6757 (3.9283) grad_norm: 2.3092 (2.3518) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:41 root] (utils.py 283): INFO Epoch: [14] [ 710/2502] eta: 0:22:51 lr: 0.000012 loss_cls: 3.9888 (3.9249) grad_norm: 2.2876 (2.3514) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:49 root] (utils.py 283): INFO Epoch: [14] [ 720/2502] eta: 0:22:43 lr: 0.000012 loss_cls: 3.9903 (3.9280) grad_norm: 2.3263 (2.3517) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 19:01:57 root] (utils.py 283): INFO Epoch: [14] [ 730/2502] eta: 0:22:36 lr: 0.000012 loss_cls: 4.1750 (3.9301) grad_norm: 2.3563 (2.3519) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 19:02:05 root] (utils.py 283): INFO Epoch: [14] [ 740/2502] eta: 0:22:29 lr: 0.000012 loss_cls: 3.8897 (3.9225) grad_norm: 2.3971 (2.3521) time: 0.7863 data: 0.0002 max mem: 8426 +[2024-12-10 19:02:12 root] (utils.py 283): INFO Epoch: [14] [ 750/2502] eta: 0:22:21 lr: 0.000012 loss_cls: 3.7590 (3.9220) grad_norm: 2.2965 (2.3515) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-10 19:02:20 root] (utils.py 283): INFO Epoch: [14] [ 760/2502] eta: 0:22:13 lr: 0.000012 loss_cls: 4.0183 (3.9233) grad_norm: 2.2995 (2.3518) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 19:02:28 root] (utils.py 283): INFO Epoch: [14] [ 770/2502] eta: 0:22:05 lr: 0.000012 loss_cls: 4.0947 (3.9270) grad_norm: 2.3708 (2.3520) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 19:02:35 root] (utils.py 283): INFO Epoch: [14] [ 780/2502] eta: 0:21:58 lr: 0.000012 loss_cls: 4.2264 (3.9256) grad_norm: 2.3653 (2.3519) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 19:02:43 root] (utils.py 283): INFO Epoch: [14] [ 790/2502] eta: 0:21:50 lr: 0.000012 loss_cls: 3.7480 (3.9239) grad_norm: 2.3330 (2.3512) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 19:02:50 root] (utils.py 283): INFO Epoch: [14] [ 800/2502] eta: 0:21:42 lr: 0.000012 loss_cls: 4.0020 (3.9260) grad_norm: 2.3492 (2.3520) time: 0.7570 data: 0.0003 max mem: 8426 +[2024-12-10 19:02:58 root] (utils.py 283): INFO Epoch: [14] [ 810/2502] eta: 0:21:34 lr: 0.000012 loss_cls: 4.0372 (3.9268) grad_norm: 2.3492 (2.3518) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 19:03:06 root] (utils.py 283): INFO Epoch: [14] [ 820/2502] eta: 0:21:26 lr: 0.000012 loss_cls: 4.1926 (3.9281) grad_norm: 2.3672 (2.3520) time: 0.7588 data: 0.0003 max mem: 8426 +[2024-12-10 19:03:13 root] (utils.py 283): INFO Epoch: [14] [ 830/2502] eta: 0:21:18 lr: 0.000012 loss_cls: 3.8457 (3.9255) grad_norm: 2.3672 (2.3517) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 19:03:21 root] (utils.py 283): INFO Epoch: [14] [ 840/2502] eta: 0:21:11 lr: 0.000012 loss_cls: 3.6985 (3.9233) grad_norm: 2.3779 (2.3523) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 19:03:28 root] (utils.py 283): INFO Epoch: [14] [ 850/2502] eta: 0:21:03 lr: 0.000012 loss_cls: 3.9293 (3.9239) grad_norm: 2.3779 (2.3526) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 19:03:36 root] (utils.py 283): INFO Epoch: [14] [ 860/2502] eta: 0:20:55 lr: 0.000012 loss_cls: 3.7457 (3.9199) grad_norm: 2.3299 (2.3521) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 19:03:44 root] (utils.py 283): INFO Epoch: [14] [ 870/2502] eta: 0:20:48 lr: 0.000012 loss_cls: 3.4397 (3.9157) grad_norm: 2.3299 (2.3521) time: 0.7555 data: 0.0002 max mem: 8426 +[2024-12-10 19:03:51 root] (utils.py 283): INFO Epoch: [14] [ 880/2502] eta: 0:20:40 lr: 0.000012 loss_cls: 4.1406 (3.9184) grad_norm: 2.3281 (2.3520) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 19:03:59 root] (utils.py 283): INFO Epoch: [14] [ 890/2502] eta: 0:20:33 lr: 0.000012 loss_cls: 4.0214 (3.9161) grad_norm: 2.3270 (2.3521) time: 0.7789 data: 0.0002 max mem: 8426 +[2024-12-10 19:04:07 root] (utils.py 283): INFO Epoch: [14] [ 900/2502] eta: 0:20:26 lr: 0.000012 loss_cls: 4.0131 (3.9158) grad_norm: 2.3239 (2.3523) time: 0.7840 data: 0.0002 max mem: 8426 +[2024-12-10 19:04:15 root] (utils.py 283): INFO Epoch: [14] [ 910/2502] eta: 0:20:18 lr: 0.000012 loss_cls: 4.0832 (3.9162) grad_norm: 2.3251 (2.3527) time: 0.7792 data: 0.0002 max mem: 8426 +[2024-12-10 19:04:22 root] (utils.py 283): INFO Epoch: [14] [ 920/2502] eta: 0:20:10 lr: 0.000012 loss_cls: 4.0383 (3.9172) grad_norm: 2.3031 (2.3526) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 19:04:30 root] (utils.py 283): INFO Epoch: [14] [ 930/2502] eta: 0:20:03 lr: 0.000012 loss_cls: 4.1550 (3.9195) grad_norm: 2.2892 (2.3521) time: 0.7582 data: 0.0003 max mem: 8426 +[2024-12-10 19:04:38 root] (utils.py 283): INFO Epoch: [14] [ 940/2502] eta: 0:19:55 lr: 0.000012 loss_cls: 4.1639 (3.9177) grad_norm: 2.3096 (2.3521) time: 0.7581 data: 0.0003 max mem: 8426 +[2024-12-10 19:04:45 root] (utils.py 283): INFO Epoch: [14] [ 950/2502] eta: 0:19:47 lr: 0.000012 loss_cls: 3.5921 (3.9141) grad_norm: 2.3605 (2.3515) time: 0.7545 data: 0.0002 max mem: 8426 +[2024-12-10 19:04:53 root] (utils.py 283): INFO Epoch: [14] [ 960/2502] eta: 0:19:39 lr: 0.000012 loss_cls: 3.6204 (3.9133) grad_norm: 2.2968 (2.3510) time: 0.7561 data: 0.0003 max mem: 8426 +[2024-12-10 19:05:00 root] (utils.py 283): INFO Epoch: [14] [ 970/2502] eta: 0:19:31 lr: 0.000012 loss_cls: 3.9039 (3.9124) grad_norm: 2.3067 (2.3509) time: 0.7597 data: 0.0003 max mem: 8426 +[2024-12-10 19:05:08 root] (utils.py 283): INFO Epoch: [14] [ 980/2502] eta: 0:19:24 lr: 0.000012 loss_cls: 3.9344 (3.9112) grad_norm: 2.3731 (2.3514) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 19:05:16 root] (utils.py 283): INFO Epoch: [14] [ 990/2502] eta: 0:19:16 lr: 0.000012 loss_cls: 3.9353 (3.9116) grad_norm: 2.3731 (2.3516) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 19:05:23 root] (utils.py 283): INFO Epoch: [14] [1000/2502] eta: 0:19:08 lr: 0.000012 loss_cls: 3.9744 (3.9114) grad_norm: 2.3479 (2.3518) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 19:05:31 root] (utils.py 283): INFO Epoch: [14] [1010/2502] eta: 0:19:01 lr: 0.000012 loss_cls: 4.0492 (3.9109) grad_norm: 2.3479 (2.3520) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 19:05:38 root] (utils.py 283): INFO Epoch: [14] [1020/2502] eta: 0:18:53 lr: 0.000012 loss_cls: 4.0912 (3.9134) grad_norm: 2.3668 (2.3520) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 19:05:46 root] (utils.py 283): INFO Epoch: [14] [1030/2502] eta: 0:18:45 lr: 0.000012 loss_cls: 4.0896 (3.9115) grad_norm: 2.2941 (2.3515) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 19:05:54 root] (utils.py 283): INFO Epoch: [14] [1040/2502] eta: 0:18:37 lr: 0.000012 loss_cls: 3.7615 (3.9122) grad_norm: 2.2918 (2.3515) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 19:06:01 root] (utils.py 283): INFO Epoch: [14] [1050/2502] eta: 0:18:30 lr: 0.000012 loss_cls: 3.8806 (3.9102) grad_norm: 2.2882 (2.3511) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 19:06:09 root] (utils.py 283): INFO Epoch: [14] [1060/2502] eta: 0:18:22 lr: 0.000012 loss_cls: 3.8806 (3.9100) grad_norm: 2.3099 (2.3514) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 19:06:16 root] (utils.py 283): INFO Epoch: [14] [1070/2502] eta: 0:18:15 lr: 0.000012 loss_cls: 3.7875 (3.9084) grad_norm: 2.3181 (2.3516) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 19:06:24 root] (utils.py 283): INFO Epoch: [14] [1080/2502] eta: 0:18:07 lr: 0.000012 loss_cls: 3.7162 (3.9083) grad_norm: 2.3344 (2.3514) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 19:06:32 root] (utils.py 283): INFO Epoch: [14] [1090/2502] eta: 0:17:59 lr: 0.000012 loss_cls: 3.8075 (3.9060) grad_norm: 2.3591 (2.3517) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 19:06:39 root] (utils.py 283): INFO Epoch: [14] [1100/2502] eta: 0:17:52 lr: 0.000012 loss_cls: 3.8443 (3.9051) grad_norm: 2.3341 (2.3514) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 19:06:47 root] (utils.py 283): INFO Epoch: [14] [1110/2502] eta: 0:17:44 lr: 0.000012 loss_cls: 3.8443 (3.9044) grad_norm: 2.3188 (2.3515) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 19:06:55 root] (utils.py 283): INFO Epoch: [14] [1120/2502] eta: 0:17:36 lr: 0.000012 loss_cls: 3.7561 (3.9035) grad_norm: 2.3400 (2.3519) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 19:07:02 root] (utils.py 283): INFO Epoch: [14] [1130/2502] eta: 0:17:29 lr: 0.000012 loss_cls: 4.0353 (3.9061) grad_norm: 2.3932 (2.3521) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 19:07:10 root] (utils.py 283): INFO Epoch: [14] [1140/2502] eta: 0:17:21 lr: 0.000012 loss_cls: 4.2497 (3.9066) grad_norm: 2.3273 (2.3517) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 19:07:18 root] (utils.py 283): INFO Epoch: [14] [1150/2502] eta: 0:17:13 lr: 0.000012 loss_cls: 4.0699 (3.9076) grad_norm: 2.3045 (2.3513) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 19:07:25 root] (utils.py 283): INFO Epoch: [14] [1160/2502] eta: 0:17:06 lr: 0.000012 loss_cls: 3.8188 (3.9075) grad_norm: 2.3192 (2.3513) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 19:07:33 root] (utils.py 283): INFO Epoch: [14] [1170/2502] eta: 0:16:58 lr: 0.000012 loss_cls: 4.0123 (3.9067) grad_norm: 2.3120 (2.3515) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 19:07:41 root] (utils.py 283): INFO Epoch: [14] [1180/2502] eta: 0:16:50 lr: 0.000012 loss_cls: 3.9423 (3.9061) grad_norm: 2.2753 (2.3506) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 19:07:48 root] (utils.py 283): INFO Epoch: [14] [1190/2502] eta: 0:16:43 lr: 0.000012 loss_cls: 3.9326 (3.9057) grad_norm: 2.2679 (2.3500) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 19:07:56 root] (utils.py 283): INFO Epoch: [14] [1200/2502] eta: 0:16:35 lr: 0.000012 loss_cls: 3.9326 (3.9054) grad_norm: 2.2679 (2.3497) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 19:08:04 root] (utils.py 283): INFO Epoch: [14] [1210/2502] eta: 0:16:27 lr: 0.000012 loss_cls: 3.9433 (3.9050) grad_norm: 2.2969 (2.3496) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 19:08:11 root] (utils.py 283): INFO Epoch: [14] [1220/2502] eta: 0:16:20 lr: 0.000012 loss_cls: 4.0569 (3.9056) grad_norm: 2.3158 (2.3498) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 19:08:19 root] (utils.py 283): INFO Epoch: [14] [1230/2502] eta: 0:16:12 lr: 0.000012 loss_cls: 4.0018 (3.9061) grad_norm: 2.2977 (2.3491) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 19:08:27 root] (utils.py 283): INFO Epoch: [14] [1240/2502] eta: 0:16:05 lr: 0.000012 loss_cls: 4.1258 (3.9085) grad_norm: 2.2977 (2.3492) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 19:08:34 root] (utils.py 283): INFO Epoch: [14] [1250/2502] eta: 0:15:57 lr: 0.000012 loss_cls: 4.3813 (3.9112) grad_norm: 2.3373 (2.3493) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 19:08:42 root] (utils.py 283): INFO Epoch: [14] [1260/2502] eta: 0:15:49 lr: 0.000012 loss_cls: 4.1891 (3.9124) grad_norm: 2.2796 (2.3492) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 19:08:50 root] (utils.py 283): INFO Epoch: [14] [1270/2502] eta: 0:15:42 lr: 0.000012 loss_cls: 3.8999 (3.9103) grad_norm: 2.3114 (2.3495) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 19:08:57 root] (utils.py 283): INFO Epoch: [14] [1280/2502] eta: 0:15:34 lr: 0.000012 loss_cls: 3.9630 (3.9108) grad_norm: 2.3334 (2.3494) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 19:09:05 root] (utils.py 283): INFO Epoch: [14] [1290/2502] eta: 0:15:27 lr: 0.000012 loss_cls: 3.4726 (3.9088) grad_norm: 2.3334 (2.3494) time: 0.7706 data: 0.0003 max mem: 8426 +[2024-12-10 19:09:13 root] (utils.py 283): INFO Epoch: [14] [1300/2502] eta: 0:15:19 lr: 0.000012 loss_cls: 3.4726 (3.9099) grad_norm: 2.3992 (2.3498) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 19:09:20 root] (utils.py 283): INFO Epoch: [14] [1310/2502] eta: 0:15:11 lr: 0.000012 loss_cls: 3.3194 (3.9060) grad_norm: 2.3890 (2.3499) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 19:09:28 root] (utils.py 283): INFO Epoch: [14] [1320/2502] eta: 0:15:04 lr: 0.000012 loss_cls: 3.3509 (3.9058) grad_norm: 2.3348 (2.3498) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 19:09:36 root] (utils.py 283): INFO Epoch: [14] [1330/2502] eta: 0:14:56 lr: 0.000012 loss_cls: 4.0436 (3.9060) grad_norm: 2.3253 (2.3500) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 19:09:43 root] (utils.py 283): INFO Epoch: [14] [1340/2502] eta: 0:14:48 lr: 0.000012 loss_cls: 4.0236 (3.9063) grad_norm: 2.3764 (2.3507) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 19:09:51 root] (utils.py 283): INFO Epoch: [14] [1350/2502] eta: 0:14:41 lr: 0.000012 loss_cls: 4.0762 (3.9060) grad_norm: 2.3450 (2.3504) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 19:09:59 root] (utils.py 283): INFO Epoch: [14] [1360/2502] eta: 0:14:33 lr: 0.000012 loss_cls: 3.7974 (3.9049) grad_norm: 2.3115 (2.3504) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:06 root] (utils.py 283): INFO Epoch: [14] [1370/2502] eta: 0:14:26 lr: 0.000012 loss_cls: 4.0701 (3.9064) grad_norm: 2.3115 (2.3502) time: 0.7730 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:14 root] (utils.py 283): INFO Epoch: [14] [1380/2502] eta: 0:14:18 lr: 0.000012 loss_cls: 4.1210 (3.9072) grad_norm: 2.2520 (2.3499) time: 0.7805 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:22 root] (utils.py 283): INFO Epoch: [14] [1390/2502] eta: 0:14:11 lr: 0.000012 loss_cls: 4.0747 (3.9053) grad_norm: 2.3215 (2.3499) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:30 root] (utils.py 283): INFO Epoch: [14] [1400/2502] eta: 0:14:03 lr: 0.000012 loss_cls: 3.8012 (3.9049) grad_norm: 2.3431 (2.3497) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:37 root] (utils.py 283): INFO Epoch: [14] [1410/2502] eta: 0:13:55 lr: 0.000012 loss_cls: 4.1804 (3.9080) grad_norm: 2.3540 (2.3501) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:45 root] (utils.py 283): INFO Epoch: [14] [1420/2502] eta: 0:13:48 lr: 0.000012 loss_cls: 4.2070 (3.9092) grad_norm: 2.3244 (2.3494) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 19:10:53 root] (utils.py 283): INFO Epoch: [14] [1430/2502] eta: 0:13:40 lr: 0.000012 loss_cls: 4.0672 (3.9089) grad_norm: 2.2713 (2.3488) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 19:11:00 root] (utils.py 283): INFO Epoch: [14] [1440/2502] eta: 0:13:32 lr: 0.000012 loss_cls: 3.7445 (3.9071) grad_norm: 2.2713 (2.3484) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 19:11:08 root] (utils.py 283): INFO Epoch: [14] [1450/2502] eta: 0:13:25 lr: 0.000012 loss_cls: 3.7049 (3.9056) grad_norm: 2.2931 (2.3488) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 19:11:16 root] (utils.py 283): INFO Epoch: [14] [1460/2502] eta: 0:13:17 lr: 0.000012 loss_cls: 3.9439 (3.9058) grad_norm: 2.3482 (2.3487) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 19:11:23 root] (utils.py 283): INFO Epoch: [14] [1470/2502] eta: 0:13:09 lr: 0.000012 loss_cls: 4.1419 (3.9068) grad_norm: 2.3583 (2.3490) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 19:11:31 root] (utils.py 283): INFO Epoch: [14] [1480/2502] eta: 0:13:02 lr: 0.000012 loss_cls: 4.1321 (3.9073) grad_norm: 2.3689 (2.3489) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 19:11:39 root] (utils.py 283): INFO Epoch: [14] [1490/2502] eta: 0:12:54 lr: 0.000012 loss_cls: 4.0780 (3.9078) grad_norm: 2.3632 (2.3493) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 19:11:46 root] (utils.py 283): INFO Epoch: [14] [1500/2502] eta: 0:12:46 lr: 0.000012 loss_cls: 3.8967 (3.9078) grad_norm: 2.3201 (2.3490) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 19:11:54 root] (utils.py 283): INFO Epoch: [14] [1510/2502] eta: 0:12:39 lr: 0.000012 loss_cls: 3.8633 (3.9071) grad_norm: 2.2879 (2.3490) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:02 root] (utils.py 283): INFO Epoch: [14] [1520/2502] eta: 0:12:31 lr: 0.000012 loss_cls: 4.0319 (3.9086) grad_norm: 2.3404 (2.3492) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:09 root] (utils.py 283): INFO Epoch: [14] [1530/2502] eta: 0:12:23 lr: 0.000012 loss_cls: 3.9719 (3.9086) grad_norm: 2.3423 (2.3491) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:17 root] (utils.py 283): INFO Epoch: [14] [1540/2502] eta: 0:12:16 lr: 0.000012 loss_cls: 3.9462 (3.9073) grad_norm: 2.3453 (2.3494) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:25 root] (utils.py 283): INFO Epoch: [14] [1550/2502] eta: 0:12:08 lr: 0.000012 loss_cls: 3.7573 (3.9071) grad_norm: 2.3453 (2.3490) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:32 root] (utils.py 283): INFO Epoch: [14] [1560/2502] eta: 0:12:00 lr: 0.000012 loss_cls: 4.2831 (3.9086) grad_norm: 2.3246 (2.3491) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:40 root] (utils.py 283): INFO Epoch: [14] [1570/2502] eta: 0:11:53 lr: 0.000012 loss_cls: 4.2115 (3.9088) grad_norm: 2.3323 (2.3491) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 19:12:48 root] (utils.py 283): INFO Epoch: [14] [1580/2502] eta: 0:11:45 lr: 0.000012 loss_cls: 4.1108 (3.9096) grad_norm: 2.2771 (2.3490) time: 0.7729 data: 0.0003 max mem: 8426 +[2024-12-10 19:12:55 root] (utils.py 283): INFO Epoch: [14] [1590/2502] eta: 0:11:38 lr: 0.000012 loss_cls: 4.0613 (3.9100) grad_norm: 2.2761 (2.3487) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 19:13:03 root] (utils.py 283): INFO Epoch: [14] [1600/2502] eta: 0:11:30 lr: 0.000012 loss_cls: 3.9787 (3.9085) grad_norm: 2.2950 (2.3487) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 19:13:11 root] (utils.py 283): INFO Epoch: [14] [1610/2502] eta: 0:11:22 lr: 0.000012 loss_cls: 3.8283 (3.9082) grad_norm: 2.3628 (2.3487) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 19:13:18 root] (utils.py 283): INFO Epoch: [14] [1620/2502] eta: 0:11:15 lr: 0.000012 loss_cls: 3.9054 (3.9072) grad_norm: 2.3447 (2.3484) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 19:13:26 root] (utils.py 283): INFO Epoch: [14] [1630/2502] eta: 0:11:07 lr: 0.000012 loss_cls: 4.2436 (3.9095) grad_norm: 2.3442 (2.3489) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 19:13:34 root] (utils.py 283): INFO Epoch: [14] [1640/2502] eta: 0:10:59 lr: 0.000012 loss_cls: 4.2307 (3.9105) grad_norm: 2.3494 (2.3491) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 19:13:41 root] (utils.py 283): INFO Epoch: [14] [1650/2502] eta: 0:10:52 lr: 0.000012 loss_cls: 3.9505 (3.9090) grad_norm: 2.3589 (2.3495) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 19:13:49 root] (utils.py 283): INFO Epoch: [14] [1660/2502] eta: 0:10:44 lr: 0.000012 loss_cls: 3.8463 (3.9087) grad_norm: 2.3433 (2.3493) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 19:13:57 root] (utils.py 283): INFO Epoch: [14] [1670/2502] eta: 0:10:36 lr: 0.000012 loss_cls: 3.7528 (3.9077) grad_norm: 2.3341 (2.3497) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 19:14:04 root] (utils.py 283): INFO Epoch: [14] [1680/2502] eta: 0:10:29 lr: 0.000012 loss_cls: 3.8386 (3.9078) grad_norm: 2.3343 (2.3499) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 19:14:12 root] (utils.py 283): INFO Epoch: [14] [1690/2502] eta: 0:10:21 lr: 0.000012 loss_cls: 4.0065 (3.9073) grad_norm: 2.3506 (2.3499) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 19:14:20 root] (utils.py 283): INFO Epoch: [14] [1700/2502] eta: 0:10:13 lr: 0.000012 loss_cls: 3.9979 (3.9076) grad_norm: 2.3622 (2.3502) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 19:14:27 root] (utils.py 283): INFO Epoch: [14] [1710/2502] eta: 0:10:06 lr: 0.000012 loss_cls: 3.9979 (3.9060) grad_norm: 2.3622 (2.3503) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 19:14:35 root] (utils.py 283): INFO Epoch: [14] [1720/2502] eta: 0:09:58 lr: 0.000012 loss_cls: 3.8388 (3.9061) grad_norm: 2.3678 (2.3503) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 19:14:43 root] (utils.py 283): INFO Epoch: [14] [1730/2502] eta: 0:09:50 lr: 0.000012 loss_cls: 4.1411 (3.9084) grad_norm: 2.4034 (2.3506) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 19:14:50 root] (utils.py 283): INFO Epoch: [14] [1740/2502] eta: 0:09:43 lr: 0.000012 loss_cls: 4.2908 (3.9090) grad_norm: 2.3868 (2.3507) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 19:14:58 root] (utils.py 283): INFO Epoch: [14] [1750/2502] eta: 0:09:35 lr: 0.000012 loss_cls: 3.9701 (3.9084) grad_norm: 2.2981 (2.3503) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 19:15:05 root] (utils.py 283): INFO Epoch: [14] [1760/2502] eta: 0:09:27 lr: 0.000012 loss_cls: 3.6199 (3.9076) grad_norm: 2.2734 (2.3505) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 19:15:13 root] (utils.py 283): INFO Epoch: [14] [1770/2502] eta: 0:09:20 lr: 0.000012 loss_cls: 3.5818 (3.9063) grad_norm: 2.3260 (2.3506) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 19:15:21 root] (utils.py 283): INFO Epoch: [14] [1780/2502] eta: 0:09:12 lr: 0.000012 loss_cls: 3.8629 (3.9059) grad_norm: 2.3355 (2.3506) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 19:15:28 root] (utils.py 283): INFO Epoch: [14] [1790/2502] eta: 0:09:04 lr: 0.000012 loss_cls: 3.8324 (3.9038) grad_norm: 2.3355 (2.3506) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 19:15:36 root] (utils.py 283): INFO Epoch: [14] [1800/2502] eta: 0:08:57 lr: 0.000012 loss_cls: 3.9447 (3.9051) grad_norm: 2.3707 (2.3506) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 19:15:44 root] (utils.py 283): INFO Epoch: [14] [1810/2502] eta: 0:08:49 lr: 0.000012 loss_cls: 4.0080 (3.9038) grad_norm: 2.4191 (2.3511) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 19:15:51 root] (utils.py 283): INFO Epoch: [14] [1820/2502] eta: 0:08:41 lr: 0.000012 loss_cls: 4.1313 (3.9042) grad_norm: 2.3043 (2.3507) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 19:15:59 root] (utils.py 283): INFO Epoch: [14] [1830/2502] eta: 0:08:34 lr: 0.000012 loss_cls: 4.1313 (3.9042) grad_norm: 2.2865 (2.3503) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 19:16:06 root] (utils.py 283): INFO Epoch: [14] [1840/2502] eta: 0:08:26 lr: 0.000012 loss_cls: 3.9801 (3.9049) grad_norm: 2.2869 (2.3503) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 19:16:14 root] (utils.py 283): INFO Epoch: [14] [1850/2502] eta: 0:08:18 lr: 0.000012 loss_cls: 3.8448 (3.9045) grad_norm: 2.3027 (2.3503) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 19:16:22 root] (utils.py 283): INFO Epoch: [14] [1860/2502] eta: 0:08:11 lr: 0.000012 loss_cls: 4.0091 (3.9055) grad_norm: 2.3181 (2.3503) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 19:16:29 root] (utils.py 283): INFO Epoch: [14] [1870/2502] eta: 0:08:03 lr: 0.000012 loss_cls: 4.0942 (3.9054) grad_norm: 2.3359 (2.3503) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 19:16:37 root] (utils.py 283): INFO Epoch: [14] [1880/2502] eta: 0:07:56 lr: 0.000012 loss_cls: 4.0360 (3.9050) grad_norm: 2.3295 (2.3503) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 19:16:45 root] (utils.py 283): INFO Epoch: [14] [1890/2502] eta: 0:07:48 lr: 0.000012 loss_cls: 4.1946 (3.9062) grad_norm: 2.3519 (2.3501) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 19:16:52 root] (utils.py 283): INFO Epoch: [14] [1900/2502] eta: 0:07:40 lr: 0.000012 loss_cls: 3.9791 (3.9048) grad_norm: 2.3519 (2.3503) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 19:17:00 root] (utils.py 283): INFO Epoch: [14] [1910/2502] eta: 0:07:33 lr: 0.000012 loss_cls: 3.8433 (3.9065) grad_norm: 2.3622 (2.3508) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 19:17:08 root] (utils.py 283): INFO Epoch: [14] [1920/2502] eta: 0:07:25 lr: 0.000012 loss_cls: 4.1514 (3.9053) grad_norm: 2.3679 (2.3506) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 19:17:15 root] (utils.py 283): INFO Epoch: [14] [1930/2502] eta: 0:07:17 lr: 0.000012 loss_cls: 3.9548 (3.9062) grad_norm: 2.3489 (2.3507) time: 0.7693 data: 0.0003 max mem: 8426 +[2024-12-10 19:17:23 root] (utils.py 283): INFO Epoch: [14] [1940/2502] eta: 0:07:10 lr: 0.000012 loss_cls: 4.1604 (3.9079) grad_norm: 2.3549 (2.3510) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 19:17:31 root] (utils.py 283): INFO Epoch: [14] [1950/2502] eta: 0:07:02 lr: 0.000012 loss_cls: 4.0960 (3.9076) grad_norm: 2.3226 (2.3508) time: 0.7747 data: 0.0003 max mem: 8426 +[2024-12-10 19:17:39 root] (utils.py 283): INFO Epoch: [14] [1960/2502] eta: 0:06:54 lr: 0.000012 loss_cls: 4.0910 (3.9086) grad_norm: 2.3040 (2.3507) time: 0.7826 data: 0.0003 max mem: 8426 +[2024-12-10 19:17:47 root] (utils.py 283): INFO Epoch: [14] [1970/2502] eta: 0:06:47 lr: 0.000012 loss_cls: 4.1623 (3.9094) grad_norm: 2.3236 (2.3507) time: 0.7823 data: 0.0002 max mem: 8426 +[2024-12-10 19:17:54 root] (utils.py 283): INFO Epoch: [14] [1980/2502] eta: 0:06:39 lr: 0.000012 loss_cls: 4.0689 (3.9099) grad_norm: 2.3236 (2.3507) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:02 root] (utils.py 283): INFO Epoch: [14] [1990/2502] eta: 0:06:31 lr: 0.000012 loss_cls: 3.8283 (3.9088) grad_norm: 2.3571 (2.3507) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 19:18:10 root] (utils.py 283): INFO Epoch: [14] [2000/2502] eta: 0:06:24 lr: 0.000012 loss_cls: 3.6453 (3.9079) grad_norm: 2.3571 (2.3508) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:17 root] (utils.py 283): INFO Epoch: [14] [2010/2502] eta: 0:06:16 lr: 0.000012 loss_cls: 3.9559 (3.9085) grad_norm: 2.3250 (2.3506) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:25 root] (utils.py 283): INFO Epoch: [14] [2020/2502] eta: 0:06:09 lr: 0.000012 loss_cls: 3.7598 (3.9070) grad_norm: 2.3035 (2.3506) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:33 root] (utils.py 283): INFO Epoch: [14] [2030/2502] eta: 0:06:01 lr: 0.000012 loss_cls: 3.7598 (3.9075) grad_norm: 2.3585 (2.3510) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:40 root] (utils.py 283): INFO Epoch: [14] [2040/2502] eta: 0:05:53 lr: 0.000012 loss_cls: 4.2223 (3.9068) grad_norm: 2.3803 (2.3512) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:48 root] (utils.py 283): INFO Epoch: [14] [2050/2502] eta: 0:05:46 lr: 0.000012 loss_cls: 4.1279 (3.9071) grad_norm: 2.2765 (2.3508) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 19:18:56 root] (utils.py 283): INFO Epoch: [14] [2060/2502] eta: 0:05:38 lr: 0.000012 loss_cls: 3.9595 (3.9061) grad_norm: 2.3030 (2.3506) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 19:19:03 root] (utils.py 283): INFO Epoch: [14] [2070/2502] eta: 0:05:30 lr: 0.000012 loss_cls: 3.7548 (3.9061) grad_norm: 2.3063 (2.3504) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 19:19:11 root] (utils.py 283): INFO Epoch: [14] [2080/2502] eta: 0:05:23 lr: 0.000012 loss_cls: 3.9470 (3.9065) grad_norm: 2.2884 (2.3505) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 19:19:18 root] (utils.py 283): INFO Epoch: [14] [2090/2502] eta: 0:05:15 lr: 0.000012 loss_cls: 4.0061 (3.9066) grad_norm: 2.3832 (2.3511) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 19:19:26 root] (utils.py 283): INFO Epoch: [14] [2100/2502] eta: 0:05:07 lr: 0.000012 loss_cls: 3.8672 (3.9060) grad_norm: 2.3598 (2.3509) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 19:19:34 root] (utils.py 283): INFO Epoch: [14] [2110/2502] eta: 0:05:00 lr: 0.000012 loss_cls: 4.0956 (3.9071) grad_norm: 2.3422 (2.3510) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 19:19:41 root] (utils.py 283): INFO Epoch: [14] [2120/2502] eta: 0:04:52 lr: 0.000012 loss_cls: 4.1312 (3.9065) grad_norm: 2.3551 (2.3513) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 19:19:49 root] (utils.py 283): INFO Epoch: [14] [2130/2502] eta: 0:04:44 lr: 0.000012 loss_cls: 4.0301 (3.9072) grad_norm: 2.3211 (2.3510) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 19:19:57 root] (utils.py 283): INFO Epoch: [14] [2140/2502] eta: 0:04:37 lr: 0.000012 loss_cls: 3.9256 (3.9066) grad_norm: 2.3211 (2.3510) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 19:20:05 root] (utils.py 283): INFO Epoch: [14] [2150/2502] eta: 0:04:29 lr: 0.000012 loss_cls: 3.9256 (3.9084) grad_norm: 2.3574 (2.3510) time: 0.7809 data: 0.0002 max mem: 8426 +[2024-12-10 19:20:12 root] (utils.py 283): INFO Epoch: [14] [2160/2502] eta: 0:04:21 lr: 0.000012 loss_cls: 4.2372 (3.9083) grad_norm: 2.3182 (2.3510) time: 0.7768 data: 0.0002 max mem: 8426 +[2024-12-10 19:20:20 root] (utils.py 283): INFO Epoch: [14] [2170/2502] eta: 0:04:14 lr: 0.000012 loss_cls: 3.7707 (3.9068) grad_norm: 2.3308 (2.3510) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 19:20:28 root] (utils.py 283): INFO Epoch: [14] [2180/2502] eta: 0:04:06 lr: 0.000012 loss_cls: 3.4990 (3.9055) grad_norm: 2.3313 (2.3512) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 19:20:35 root] (utils.py 283): INFO Epoch: [14] [2190/2502] eta: 0:03:58 lr: 0.000012 loss_cls: 4.0315 (3.9066) grad_norm: 2.3671 (2.3513) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 19:20:43 root] (utils.py 283): INFO Epoch: [14] [2200/2502] eta: 0:03:51 lr: 0.000012 loss_cls: 4.1456 (3.9066) grad_norm: 2.3439 (2.3512) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 19:20:51 root] (utils.py 283): INFO Epoch: [14] [2210/2502] eta: 0:03:43 lr: 0.000012 loss_cls: 3.8380 (3.9069) grad_norm: 2.3165 (2.3511) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 19:20:58 root] (utils.py 283): INFO Epoch: [14] [2220/2502] eta: 0:03:35 lr: 0.000012 loss_cls: 3.7536 (3.9059) grad_norm: 2.3161 (2.3510) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 19:21:06 root] (utils.py 283): INFO Epoch: [14] [2230/2502] eta: 0:03:28 lr: 0.000012 loss_cls: 3.6433 (3.9056) grad_norm: 2.3339 (2.3510) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 19:21:14 root] (utils.py 283): INFO Epoch: [14] [2240/2502] eta: 0:03:20 lr: 0.000012 loss_cls: 3.9577 (3.9059) grad_norm: 2.3467 (2.3509) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 19:21:21 root] (utils.py 283): INFO Epoch: [14] [2250/2502] eta: 0:03:12 lr: 0.000012 loss_cls: 4.0781 (3.9065) grad_norm: 2.3695 (2.3511) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 19:21:29 root] (utils.py 283): INFO Epoch: [14] [2260/2502] eta: 0:03:05 lr: 0.000012 loss_cls: 4.1507 (3.9075) grad_norm: 2.3836 (2.3512) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-10 19:21:37 root] (utils.py 283): INFO Epoch: [14] [2270/2502] eta: 0:02:57 lr: 0.000012 loss_cls: 4.2147 (3.9079) grad_norm: 2.3695 (2.3513) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 19:21:44 root] (utils.py 283): INFO Epoch: [14] [2280/2502] eta: 0:02:49 lr: 0.000012 loss_cls: 4.0149 (3.9081) grad_norm: 2.3671 (2.3513) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 19:21:52 root] (utils.py 283): INFO Epoch: [14] [2290/2502] eta: 0:02:42 lr: 0.000012 loss_cls: 4.1324 (3.9086) grad_norm: 2.3416 (2.3513) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 19:22:00 root] (utils.py 283): INFO Epoch: [14] [2300/2502] eta: 0:02:34 lr: 0.000012 loss_cls: 4.1482 (3.9092) grad_norm: 2.3613 (2.3516) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 19:22:07 root] (utils.py 283): INFO Epoch: [14] [2310/2502] eta: 0:02:27 lr: 0.000012 loss_cls: 3.7896 (3.9083) grad_norm: 2.3882 (2.3518) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 19:22:15 root] (utils.py 283): INFO Epoch: [14] [2320/2502] eta: 0:02:19 lr: 0.000012 loss_cls: 4.0199 (3.9094) grad_norm: 2.3531 (2.3518) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 19:22:22 root] (utils.py 283): INFO Epoch: [14] [2330/2502] eta: 0:02:11 lr: 0.000012 loss_cls: 4.1090 (3.9096) grad_norm: 2.3178 (2.3518) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 19:22:30 root] (utils.py 283): INFO Epoch: [14] [2340/2502] eta: 0:02:04 lr: 0.000012 loss_cls: 3.9394 (3.9093) grad_norm: 2.3201 (2.3517) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 19:22:38 root] (utils.py 283): INFO Epoch: [14] [2350/2502] eta: 0:01:56 lr: 0.000012 loss_cls: 3.6023 (3.9081) grad_norm: 2.3204 (2.3517) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 19:22:45 root] (utils.py 283): INFO Epoch: [14] [2360/2502] eta: 0:01:48 lr: 0.000012 loss_cls: 3.9193 (3.9089) grad_norm: 2.3204 (2.3517) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 19:22:53 root] (utils.py 283): INFO Epoch: [14] [2370/2502] eta: 0:01:41 lr: 0.000012 loss_cls: 4.1818 (3.9100) grad_norm: 2.3066 (2.3517) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 19:23:01 root] (utils.py 283): INFO Epoch: [14] [2380/2502] eta: 0:01:33 lr: 0.000012 loss_cls: 3.9139 (3.9090) grad_norm: 2.3183 (2.3515) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 19:23:08 root] (utils.py 283): INFO Epoch: [14] [2390/2502] eta: 0:01:25 lr: 0.000012 loss_cls: 3.8988 (3.9093) grad_norm: 2.3657 (2.3515) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 19:23:16 root] (utils.py 283): INFO Epoch: [14] [2400/2502] eta: 0:01:18 lr: 0.000012 loss_cls: 4.0432 (3.9098) grad_norm: 2.3283 (2.3516) time: 0.7600 data: 0.0003 max mem: 8426 +[2024-12-10 19:23:24 root] (utils.py 283): INFO Epoch: [14] [2410/2502] eta: 0:01:10 lr: 0.000012 loss_cls: 3.9537 (3.9089) grad_norm: 2.3283 (2.3518) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 19:23:31 root] (utils.py 283): INFO Epoch: [14] [2420/2502] eta: 0:01:02 lr: 0.000012 loss_cls: 3.7256 (3.9093) grad_norm: 2.3886 (2.3518) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 19:23:39 root] (utils.py 283): INFO Epoch: [14] [2430/2502] eta: 0:00:55 lr: 0.000012 loss_cls: 3.9834 (3.9090) grad_norm: 2.3193 (2.3518) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 19:23:47 root] (utils.py 283): INFO Epoch: [14] [2440/2502] eta: 0:00:47 lr: 0.000012 loss_cls: 4.0396 (3.9093) grad_norm: 2.2737 (2.3515) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 19:23:54 root] (utils.py 283): INFO Epoch: [14] [2450/2502] eta: 0:00:39 lr: 0.000012 loss_cls: 3.9780 (3.9082) grad_norm: 2.3090 (2.3516) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 19:24:02 root] (utils.py 283): INFO Epoch: [14] [2460/2502] eta: 0:00:32 lr: 0.000012 loss_cls: 3.9245 (3.9090) grad_norm: 2.3720 (2.3516) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 19:24:09 root] (utils.py 283): INFO Epoch: [14] [2470/2502] eta: 0:00:24 lr: 0.000012 loss_cls: 3.8552 (3.9078) grad_norm: 2.2961 (2.3515) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 19:24:17 root] (utils.py 283): INFO Epoch: [14] [2480/2502] eta: 0:00:16 lr: 0.000012 loss_cls: 3.6954 (3.9078) grad_norm: 2.2961 (2.3514) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 19:24:25 root] (utils.py 283): INFO Epoch: [14] [2490/2502] eta: 0:00:09 lr: 0.000012 loss_cls: 3.7130 (3.9074) grad_norm: 2.3230 (2.3514) time: 0.7982 data: 0.0246 max mem: 8426 +[2024-12-10 19:24:33 root] (utils.py 283): INFO Epoch: [14] [2500/2502] eta: 0:00:01 lr: 0.000012 loss_cls: 3.9421 (3.9076) grad_norm: 2.3346 (2.3513) time: 0.8085 data: 0.0245 max mem: 8426 +[2024-12-10 19:24:34 root] (utils.py 283): INFO Epoch: [14] [2501/2502] eta: 0:00:00 lr: 0.000012 loss_cls: 3.9421 (3.9079) grad_norm: 2.3346 (2.3513) time: 0.8083 data: 0.0245 max mem: 8426 +[2024-12-10 19:24:34 root] (utils.py 297): INFO Epoch: [14] Total time: 0:31:56 (0.7661 s / it) +[2024-12-10 19:24:34 root] (engine.py 179): INFO Averaged stats:lr: 0.000012 loss_cls: 3.9421 (3.9143) grad_norm: 2.3346 (2.3513) +[2024-12-10 19:24:35 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6405 (0.6405) acc1: 85.9375 (85.9375) acc3: 96.0938 (96.0938) acc5: 99.2188 (99.2188) time: 0.1275 data: 0.0002 max mem: 8426 +[2024-12-10 19:24:36 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7420 (0.8181) acc1: 85.9375 (81.9602) acc3: 95.3125 (93.1108) acc5: 96.8750 (96.3778) time: 0.1278 data: 0.0003 max mem: 8426 +[2024-12-10 19:24:37 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8766 (0.8692) acc1: 78.9062 (80.9524) acc3: 92.1875 (92.4851) acc5: 95.3125 (95.4613) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 19:24:38 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9643 (0.8805) acc1: 78.9062 (80.2419) acc3: 92.1875 (92.7923) acc5: 95.3125 (95.5645) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 19:24:40 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8091 (0.8677) acc1: 80.4688 (80.8498) acc3: 94.5312 (93.0069) acc5: 96.0938 (95.7127) time: 0.1513 data: 0.0190 max mem: 8426 +[2024-12-10 19:24:42 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0449 (0.9539) acc1: 75.0000 (78.6152) acc3: 89.0625 (91.7126) acc5: 92.9688 (94.6844) time: 0.1649 data: 0.0190 max mem: 8426 +[2024-12-10 19:24:43 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2726 (0.9987) acc1: 71.0938 (77.8048) acc3: 85.9375 (90.8811) acc5: 89.0625 (93.8397) time: 0.1570 data: 0.0004 max mem: 8426 +[2024-12-10 19:24:45 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2326 (1.0391) acc1: 75.0000 (76.9366) acc3: 87.5000 (90.3499) acc5: 89.8438 (93.3759) time: 0.1508 data: 0.0004 max mem: 8426 +[2024-12-10 19:24:46 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2122 (1.0725) acc1: 73.4375 (76.1960) acc3: 86.7188 (89.7859) acc5: 90.6250 (92.9302) time: 0.1355 data: 0.0006 max mem: 8426 +[2024-12-10 19:24:47 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2122 (1.1000) acc1: 71.0938 (75.5323) acc3: 85.9375 (89.4402) acc5: 90.6250 (92.6854) time: 0.1303 data: 0.0027 max mem: 8426 +[2024-12-10 19:24:48 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1593 (1.0887) acc1: 74.2188 (75.8000) acc3: 88.2812 (89.6160) acc5: 92.1875 (92.8400) time: 0.1287 data: 0.0026 max mem: 8426 +[2024-12-10 19:24:48 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1405 s / it) +[2024-12-10 19:24:48 root] (engine.py 264): INFO * Acc@1 75.566 Acc@3 89.554 Acc@5 92.782 loss 1.092 flops 1.285 layer_flops 1.251 +[2024-12-10 19:24:48 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 19:24:48 root] (main.py 576): INFO Max accuracy: 75.65% +[2024-12-10 19:24:49 root] (utils.py 283): INFO Epoch: [15] [ 0/2502] eta: 0:33:46 lr: 0.000011 loss_cls: 4.2939 (4.2939) grad_norm: 2.1098 (2.1098) time: 0.8101 data: 0.0003 max mem: 8426 +[2024-12-10 19:24:57 root] (utils.py 283): INFO Epoch: [15] [ 10/2502] eta: 0:31:43 lr: 0.000011 loss_cls: 4.2730 (3.9880) grad_norm: 2.2919 (2.3224) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 19:25:04 root] (utils.py 283): INFO Epoch: [15] [ 20/2502] eta: 0:31:32 lr: 0.000011 loss_cls: 4.2082 (3.9955) grad_norm: 2.3269 (2.3504) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 19:25:12 root] (utils.py 283): INFO Epoch: [15] [ 30/2502] eta: 0:31:25 lr: 0.000011 loss_cls: 3.9162 (3.9821) grad_norm: 2.3445 (2.3501) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 19:25:20 root] (utils.py 283): INFO Epoch: [15] [ 40/2502] eta: 0:31:19 lr: 0.000011 loss_cls: 3.9162 (3.9500) grad_norm: 2.3207 (2.3480) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 19:25:27 root] (utils.py 283): INFO Epoch: [15] [ 50/2502] eta: 0:31:10 lr: 0.000011 loss_cls: 4.0941 (3.9711) grad_norm: 2.3264 (2.3512) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 19:25:35 root] (utils.py 283): INFO Epoch: [15] [ 60/2502] eta: 0:31:02 lr: 0.000011 loss_cls: 4.1176 (3.9829) grad_norm: 2.3417 (2.3541) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 19:25:43 root] (utils.py 283): INFO Epoch: [15] [ 70/2502] eta: 0:30:55 lr: 0.000011 loss_cls: 3.8661 (3.9233) grad_norm: 2.3742 (2.3631) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 19:25:50 root] (utils.py 283): INFO Epoch: [15] [ 80/2502] eta: 0:30:51 lr: 0.000011 loss_cls: 4.0391 (3.9404) grad_norm: 2.4258 (2.3648) time: 0.7697 data: 0.0003 max mem: 8426 +[2024-12-10 19:25:58 root] (utils.py 283): INFO Epoch: [15] [ 90/2502] eta: 0:30:48 lr: 0.000011 loss_cls: 4.0391 (3.9153) grad_norm: 2.3498 (2.3636) time: 0.7783 data: 0.0002 max mem: 8426 +[2024-12-10 19:26:06 root] (utils.py 283): INFO Epoch: [15] [ 100/2502] eta: 0:30:43 lr: 0.000011 loss_cls: 3.8854 (3.9084) grad_norm: 2.3472 (2.3646) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 19:26:14 root] (utils.py 283): INFO Epoch: [15] [ 110/2502] eta: 0:30:39 lr: 0.000011 loss_cls: 4.1119 (3.9395) grad_norm: 2.3751 (2.3658) time: 0.7812 data: 0.0003 max mem: 8426 +[2024-12-10 19:26:22 root] (utils.py 283): INFO Epoch: [15] [ 120/2502] eta: 0:30:34 lr: 0.000011 loss_cls: 4.3042 (3.9588) grad_norm: 2.3730 (2.3641) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 19:26:29 root] (utils.py 283): INFO Epoch: [15] [ 130/2502] eta: 0:30:27 lr: 0.000011 loss_cls: 4.2687 (3.9595) grad_norm: 2.3167 (2.3623) time: 0.7803 data: 0.0003 max mem: 8426 +[2024-12-10 19:26:37 root] (utils.py 283): INFO Epoch: [15] [ 140/2502] eta: 0:30:21 lr: 0.000011 loss_cls: 4.0029 (3.9551) grad_norm: 2.3167 (2.3594) time: 0.7777 data: 0.0003 max mem: 8426 +[2024-12-10 19:26:45 root] (utils.py 283): INFO Epoch: [15] [ 150/2502] eta: 0:30:13 lr: 0.000011 loss_cls: 3.5942 (3.9389) grad_norm: 2.3196 (2.3579) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 19:26:52 root] (utils.py 283): INFO Epoch: [15] [ 160/2502] eta: 0:30:04 lr: 0.000011 loss_cls: 3.9029 (3.9533) grad_norm: 2.3607 (2.3594) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 19:27:00 root] (utils.py 283): INFO Epoch: [15] [ 170/2502] eta: 0:29:56 lr: 0.000011 loss_cls: 4.1475 (3.9594) grad_norm: 2.2774 (2.3552) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 19:27:08 root] (utils.py 283): INFO Epoch: [15] [ 180/2502] eta: 0:29:47 lr: 0.000011 loss_cls: 4.0793 (3.9523) grad_norm: 2.2774 (2.3516) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 19:27:15 root] (utils.py 283): INFO Epoch: [15] [ 190/2502] eta: 0:29:38 lr: 0.000011 loss_cls: 4.0793 (3.9661) grad_norm: 2.3151 (2.3527) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 19:27:23 root] (utils.py 283): INFO Epoch: [15] [ 200/2502] eta: 0:29:30 lr: 0.000011 loss_cls: 4.2428 (3.9650) grad_norm: 2.3567 (2.3510) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 19:27:31 root] (utils.py 283): INFO Epoch: [15] [ 210/2502] eta: 0:29:22 lr: 0.000011 loss_cls: 3.9629 (3.9520) grad_norm: 2.3133 (2.3477) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 19:27:38 root] (utils.py 283): INFO Epoch: [15] [ 220/2502] eta: 0:29:13 lr: 0.000011 loss_cls: 3.6650 (3.9290) grad_norm: 2.3629 (2.3509) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 19:27:46 root] (utils.py 283): INFO Epoch: [15] [ 230/2502] eta: 0:29:05 lr: 0.000011 loss_cls: 3.7574 (3.9264) grad_norm: 2.3629 (2.3509) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 19:27:53 root] (utils.py 283): INFO Epoch: [15] [ 240/2502] eta: 0:28:56 lr: 0.000011 loss_cls: 4.1388 (3.9294) grad_norm: 2.3745 (2.3538) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 19:28:01 root] (utils.py 283): INFO Epoch: [15] [ 250/2502] eta: 0:28:48 lr: 0.000011 loss_cls: 3.8490 (3.9308) grad_norm: 2.3955 (2.3519) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 19:28:09 root] (utils.py 283): INFO Epoch: [15] [ 260/2502] eta: 0:28:40 lr: 0.000011 loss_cls: 3.7519 (3.9231) grad_norm: 2.3001 (2.3508) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 19:28:16 root] (utils.py 283): INFO Epoch: [15] [ 270/2502] eta: 0:28:32 lr: 0.000011 loss_cls: 3.9167 (3.9329) grad_norm: 2.3612 (2.3547) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 19:28:24 root] (utils.py 283): INFO Epoch: [15] [ 280/2502] eta: 0:28:24 lr: 0.000011 loss_cls: 3.9167 (3.9221) grad_norm: 2.3926 (2.3548) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 19:28:32 root] (utils.py 283): INFO Epoch: [15] [ 290/2502] eta: 0:28:16 lr: 0.000011 loss_cls: 3.8109 (3.9196) grad_norm: 2.4053 (2.3581) time: 0.7603 data: 0.0003 max mem: 8426 +[2024-12-10 19:28:39 root] (utils.py 283): INFO Epoch: [15] [ 300/2502] eta: 0:28:08 lr: 0.000011 loss_cls: 3.9594 (3.9165) grad_norm: 2.4210 (2.3599) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 19:28:47 root] (utils.py 283): INFO Epoch: [15] [ 310/2502] eta: 0:28:00 lr: 0.000011 loss_cls: 3.8072 (3.9112) grad_norm: 2.4059 (2.3614) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 19:28:55 root] (utils.py 283): INFO Epoch: [15] [ 320/2502] eta: 0:27:53 lr: 0.000011 loss_cls: 3.9913 (3.9095) grad_norm: 2.3622 (2.3606) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 19:29:02 root] (utils.py 283): INFO Epoch: [15] [ 330/2502] eta: 0:27:46 lr: 0.000011 loss_cls: 4.1109 (3.9184) grad_norm: 2.3172 (2.3639) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 19:29:10 root] (utils.py 283): INFO Epoch: [15] [ 340/2502] eta: 0:27:38 lr: 0.000011 loss_cls: 4.1253 (3.9154) grad_norm: 2.3572 (2.3635) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 19:29:18 root] (utils.py 283): INFO Epoch: [15] [ 350/2502] eta: 0:27:30 lr: 0.000011 loss_cls: 4.2047 (3.9229) grad_norm: 2.3572 (2.3636) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 19:29:25 root] (utils.py 283): INFO Epoch: [15] [ 360/2502] eta: 0:27:22 lr: 0.000011 loss_cls: 4.3725 (3.9274) grad_norm: 2.3984 (2.3645) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 19:29:33 root] (utils.py 283): INFO Epoch: [15] [ 370/2502] eta: 0:27:14 lr: 0.000011 loss_cls: 4.1383 (3.9229) grad_norm: 2.3455 (2.3634) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 19:29:41 root] (utils.py 283): INFO Epoch: [15] [ 380/2502] eta: 0:27:07 lr: 0.000011 loss_cls: 4.1188 (3.9265) grad_norm: 2.3185 (2.3636) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 19:29:48 root] (utils.py 283): INFO Epoch: [15] [ 390/2502] eta: 0:26:59 lr: 0.000011 loss_cls: 4.1452 (3.9262) grad_norm: 2.3190 (2.3630) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 19:29:56 root] (utils.py 283): INFO Epoch: [15] [ 400/2502] eta: 0:26:51 lr: 0.000011 loss_cls: 4.1404 (3.9269) grad_norm: 2.3434 (2.3624) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 19:30:03 root] (utils.py 283): INFO Epoch: [15] [ 410/2502] eta: 0:26:43 lr: 0.000011 loss_cls: 3.7280 (3.9200) grad_norm: 2.3434 (2.3616) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 19:30:11 root] (utils.py 283): INFO Epoch: [15] [ 420/2502] eta: 0:26:36 lr: 0.000011 loss_cls: 3.4918 (3.9135) grad_norm: 2.2831 (2.3597) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 19:30:19 root] (utils.py 283): INFO Epoch: [15] [ 430/2502] eta: 0:26:27 lr: 0.000011 loss_cls: 3.8009 (3.9127) grad_norm: 2.3720 (2.3619) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 19:30:26 root] (utils.py 283): INFO Epoch: [15] [ 440/2502] eta: 0:26:20 lr: 0.000011 loss_cls: 3.8504 (3.9099) grad_norm: 2.4151 (2.3629) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 19:30:34 root] (utils.py 283): INFO Epoch: [15] [ 450/2502] eta: 0:26:12 lr: 0.000011 loss_cls: 3.8504 (3.9089) grad_norm: 2.3934 (2.3630) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 19:30:42 root] (utils.py 283): INFO Epoch: [15] [ 460/2502] eta: 0:26:04 lr: 0.000011 loss_cls: 3.8693 (3.9073) grad_norm: 2.3934 (2.3646) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 19:30:49 root] (utils.py 283): INFO Epoch: [15] [ 470/2502] eta: 0:25:56 lr: 0.000011 loss_cls: 3.9486 (3.9096) grad_norm: 2.4068 (2.3654) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 19:30:57 root] (utils.py 283): INFO Epoch: [15] [ 480/2502] eta: 0:25:48 lr: 0.000011 loss_cls: 3.9295 (3.9068) grad_norm: 2.3693 (2.3651) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:05 root] (utils.py 283): INFO Epoch: [15] [ 490/2502] eta: 0:25:41 lr: 0.000011 loss_cls: 3.9073 (3.9045) grad_norm: 2.3005 (2.3633) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:12 root] (utils.py 283): INFO Epoch: [15] [ 500/2502] eta: 0:25:33 lr: 0.000011 loss_cls: 3.8045 (3.9003) grad_norm: 2.1939 (2.3612) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:20 root] (utils.py 283): INFO Epoch: [15] [ 510/2502] eta: 0:25:26 lr: 0.000011 loss_cls: 3.9534 (3.8974) grad_norm: 2.2383 (2.3606) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:28 root] (utils.py 283): INFO Epoch: [15] [ 520/2502] eta: 0:25:18 lr: 0.000011 loss_cls: 3.9534 (3.8970) grad_norm: 2.3600 (2.3616) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 19:31:35 root] (utils.py 283): INFO Epoch: [15] [ 530/2502] eta: 0:25:11 lr: 0.000011 loss_cls: 3.9404 (3.8970) grad_norm: 2.4006 (2.3619) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:43 root] (utils.py 283): INFO Epoch: [15] [ 540/2502] eta: 0:25:03 lr: 0.000011 loss_cls: 3.5621 (3.8915) grad_norm: 2.3995 (2.3615) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:51 root] (utils.py 283): INFO Epoch: [15] [ 550/2502] eta: 0:24:55 lr: 0.000011 loss_cls: 3.6714 (3.8914) grad_norm: 2.3087 (2.3615) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 19:31:58 root] (utils.py 283): INFO Epoch: [15] [ 560/2502] eta: 0:24:47 lr: 0.000011 loss_cls: 4.1119 (3.8929) grad_norm: 2.3087 (2.3620) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 19:32:06 root] (utils.py 283): INFO Epoch: [15] [ 570/2502] eta: 0:24:40 lr: 0.000011 loss_cls: 4.1410 (3.8980) grad_norm: 2.3572 (2.3612) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 19:32:13 root] (utils.py 283): INFO Epoch: [15] [ 580/2502] eta: 0:24:32 lr: 0.000011 loss_cls: 4.1605 (3.9021) grad_norm: 2.3342 (2.3612) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 19:32:21 root] (utils.py 283): INFO Epoch: [15] [ 590/2502] eta: 0:24:24 lr: 0.000011 loss_cls: 4.2195 (3.9048) grad_norm: 2.3533 (2.3618) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 19:32:29 root] (utils.py 283): INFO Epoch: [15] [ 600/2502] eta: 0:24:16 lr: 0.000011 loss_cls: 4.2179 (3.9060) grad_norm: 2.4087 (2.3637) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 19:32:36 root] (utils.py 283): INFO Epoch: [15] [ 610/2502] eta: 0:24:09 lr: 0.000011 loss_cls: 4.0466 (3.9065) grad_norm: 2.3595 (2.3627) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 19:32:44 root] (utils.py 283): INFO Epoch: [15] [ 620/2502] eta: 0:24:01 lr: 0.000011 loss_cls: 4.0581 (3.9091) grad_norm: 2.3364 (2.3631) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 19:32:52 root] (utils.py 283): INFO Epoch: [15] [ 630/2502] eta: 0:23:53 lr: 0.000011 loss_cls: 4.1191 (3.9113) grad_norm: 2.3619 (2.3640) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 19:32:59 root] (utils.py 283): INFO Epoch: [15] [ 640/2502] eta: 0:23:46 lr: 0.000011 loss_cls: 3.9513 (3.9113) grad_norm: 2.3513 (2.3635) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 19:33:07 root] (utils.py 283): INFO Epoch: [15] [ 650/2502] eta: 0:23:38 lr: 0.000011 loss_cls: 3.9113 (3.9071) grad_norm: 2.3015 (2.3630) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 19:33:15 root] (utils.py 283): INFO Epoch: [15] [ 660/2502] eta: 0:23:30 lr: 0.000011 loss_cls: 4.0010 (3.9107) grad_norm: 2.3191 (2.3625) time: 0.7702 data: 0.0003 max mem: 8426 +[2024-12-10 19:33:23 root] (utils.py 283): INFO Epoch: [15] [ 670/2502] eta: 0:23:23 lr: 0.000011 loss_cls: 4.0010 (3.9077) grad_norm: 2.3191 (2.3621) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 19:33:31 root] (utils.py 283): INFO Epoch: [15] [ 680/2502] eta: 0:23:16 lr: 0.000011 loss_cls: 3.9807 (3.9100) grad_norm: 2.2976 (2.3623) time: 0.7902 data: 0.0002 max mem: 8426 +[2024-12-10 19:33:38 root] (utils.py 283): INFO Epoch: [15] [ 690/2502] eta: 0:23:09 lr: 0.000011 loss_cls: 3.9808 (3.9090) grad_norm: 2.3240 (2.3622) time: 0.7909 data: 0.0003 max mem: 8426 +[2024-12-10 19:33:46 root] (utils.py 283): INFO Epoch: [15] [ 700/2502] eta: 0:23:02 lr: 0.000011 loss_cls: 3.8917 (3.9078) grad_norm: 2.3301 (2.3622) time: 0.7894 data: 0.0003 max mem: 8426 +[2024-12-10 19:33:54 root] (utils.py 283): INFO Epoch: [15] [ 710/2502] eta: 0:22:55 lr: 0.000011 loss_cls: 4.0322 (3.9114) grad_norm: 2.3431 (2.3627) time: 0.7923 data: 0.0002 max mem: 8426 +[2024-12-10 19:34:02 root] (utils.py 283): INFO Epoch: [15] [ 720/2502] eta: 0:22:48 lr: 0.000011 loss_cls: 4.0480 (3.9106) grad_norm: 2.3564 (2.3633) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 19:34:10 root] (utils.py 283): INFO Epoch: [15] [ 730/2502] eta: 0:22:40 lr: 0.000011 loss_cls: 4.1344 (3.9157) grad_norm: 2.3564 (2.3641) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 19:34:17 root] (utils.py 283): INFO Epoch: [15] [ 740/2502] eta: 0:22:32 lr: 0.000011 loss_cls: 4.2748 (3.9168) grad_norm: 2.3160 (2.3633) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 19:34:25 root] (utils.py 283): INFO Epoch: [15] [ 750/2502] eta: 0:22:24 lr: 0.000011 loss_cls: 4.1784 (3.9178) grad_norm: 2.2763 (2.3625) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 19:34:32 root] (utils.py 283): INFO Epoch: [15] [ 760/2502] eta: 0:22:16 lr: 0.000011 loss_cls: 4.0399 (3.9180) grad_norm: 2.2934 (2.3628) time: 0.7602 data: 0.0003 max mem: 8426 +[2024-12-10 19:34:40 root] (utils.py 283): INFO Epoch: [15] [ 770/2502] eta: 0:22:09 lr: 0.000011 loss_cls: 3.8500 (3.9146) grad_norm: 2.3721 (2.3638) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 19:34:48 root] (utils.py 283): INFO Epoch: [15] [ 780/2502] eta: 0:22:01 lr: 0.000011 loss_cls: 3.5074 (3.9096) grad_norm: 2.3721 (2.3637) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 19:34:55 root] (utils.py 283): INFO Epoch: [15] [ 790/2502] eta: 0:21:53 lr: 0.000011 loss_cls: 3.8417 (3.9086) grad_norm: 2.3911 (2.3640) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 19:35:03 root] (utils.py 283): INFO Epoch: [15] [ 800/2502] eta: 0:21:45 lr: 0.000011 loss_cls: 3.8909 (3.9065) grad_norm: 2.3524 (2.3635) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 19:35:11 root] (utils.py 283): INFO Epoch: [15] [ 810/2502] eta: 0:21:38 lr: 0.000011 loss_cls: 3.8510 (3.9055) grad_norm: 2.3703 (2.3637) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 19:35:18 root] (utils.py 283): INFO Epoch: [15] [ 820/2502] eta: 0:21:30 lr: 0.000011 loss_cls: 3.9365 (3.9040) grad_norm: 2.3703 (2.3635) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 19:35:26 root] (utils.py 283): INFO Epoch: [15] [ 830/2502] eta: 0:21:23 lr: 0.000011 loss_cls: 3.9365 (3.9056) grad_norm: 2.3179 (2.3633) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-10 19:35:34 root] (utils.py 283): INFO Epoch: [15] [ 840/2502] eta: 0:21:15 lr: 0.000011 loss_cls: 4.0708 (3.9060) grad_norm: 2.3066 (2.3630) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 19:35:42 root] (utils.py 283): INFO Epoch: [15] [ 850/2502] eta: 0:21:08 lr: 0.000011 loss_cls: 4.1045 (3.9044) grad_norm: 2.3065 (2.3626) time: 0.7815 data: 0.0002 max mem: 8426 +[2024-12-10 19:35:50 root] (utils.py 283): INFO Epoch: [15] [ 860/2502] eta: 0:21:01 lr: 0.000011 loss_cls: 4.1697 (3.9060) grad_norm: 2.2855 (2.3618) time: 0.7978 data: 0.0002 max mem: 8426 +[2024-12-10 19:35:58 root] (utils.py 283): INFO Epoch: [15] [ 870/2502] eta: 0:20:54 lr: 0.000011 loss_cls: 3.9996 (3.9023) grad_norm: 2.3216 (2.3615) time: 0.7969 data: 0.0003 max mem: 8426 +[2024-12-10 19:36:06 root] (utils.py 283): INFO Epoch: [15] [ 880/2502] eta: 0:20:47 lr: 0.000011 loss_cls: 3.7755 (3.9023) grad_norm: 2.3677 (2.3619) time: 0.8005 data: 0.0002 max mem: 8426 +[2024-12-10 19:36:14 root] (utils.py 283): INFO Epoch: [15] [ 890/2502] eta: 0:20:39 lr: 0.000011 loss_cls: 3.8632 (3.9011) grad_norm: 2.3677 (2.3623) time: 0.7981 data: 0.0002 max mem: 8426 +[2024-12-10 19:36:22 root] (utils.py 283): INFO Epoch: [15] [ 900/2502] eta: 0:20:32 lr: 0.000011 loss_cls: 4.2044 (3.9046) grad_norm: 2.3494 (2.3623) time: 0.7939 data: 0.0002 max mem: 8426 +[2024-12-10 19:36:30 root] (utils.py 283): INFO Epoch: [15] [ 910/2502] eta: 0:20:25 lr: 0.000011 loss_cls: 4.2328 (3.9064) grad_norm: 2.3494 (2.3622) time: 0.7926 data: 0.0002 max mem: 8426 +[2024-12-10 19:36:37 root] (utils.py 283): INFO Epoch: [15] [ 920/2502] eta: 0:20:17 lr: 0.000011 loss_cls: 3.9637 (3.9032) grad_norm: 2.2988 (2.3614) time: 0.7866 data: 0.0002 max mem: 8426 +[2024-12-10 19:36:45 root] (utils.py 283): INFO Epoch: [15] [ 930/2502] eta: 0:20:10 lr: 0.000011 loss_cls: 3.7147 (3.9019) grad_norm: 2.2536 (2.3615) time: 0.7831 data: 0.0002 max mem: 8426 +[2024-12-10 19:36:53 root] (utils.py 283): INFO Epoch: [15] [ 940/2502] eta: 0:20:02 lr: 0.000011 loss_cls: 3.8638 (3.9019) grad_norm: 2.3285 (2.3613) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:01 root] (utils.py 283): INFO Epoch: [15] [ 950/2502] eta: 0:19:55 lr: 0.000011 loss_cls: 4.1119 (3.9034) grad_norm: 2.3319 (2.3616) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:09 root] (utils.py 283): INFO Epoch: [15] [ 960/2502] eta: 0:19:47 lr: 0.000011 loss_cls: 3.9283 (3.9019) grad_norm: 2.4130 (2.3623) time: 0.7815 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:16 root] (utils.py 283): INFO Epoch: [15] [ 970/2502] eta: 0:19:40 lr: 0.000011 loss_cls: 3.4913 (3.9004) grad_norm: 2.3335 (2.3620) time: 0.7828 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:24 root] (utils.py 283): INFO Epoch: [15] [ 980/2502] eta: 0:19:32 lr: 0.000011 loss_cls: 3.9830 (3.9009) grad_norm: 2.3099 (2.3617) time: 0.7841 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:32 root] (utils.py 283): INFO Epoch: [15] [ 990/2502] eta: 0:19:25 lr: 0.000011 loss_cls: 4.3357 (3.9041) grad_norm: 2.3099 (2.3617) time: 0.7832 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:40 root] (utils.py 283): INFO Epoch: [15] [1000/2502] eta: 0:19:17 lr: 0.000011 loss_cls: 4.1257 (3.9043) grad_norm: 2.2499 (2.3604) time: 0.7723 data: 0.0003 max mem: 8426 +[2024-12-10 19:37:47 root] (utils.py 283): INFO Epoch: [15] [1010/2502] eta: 0:19:09 lr: 0.000011 loss_cls: 3.7454 (3.9029) grad_norm: 2.3127 (2.3610) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 19:37:55 root] (utils.py 283): INFO Epoch: [15] [1020/2502] eta: 0:19:01 lr: 0.000011 loss_cls: 3.4626 (3.8997) grad_norm: 2.3553 (2.3607) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 19:38:03 root] (utils.py 283): INFO Epoch: [15] [1030/2502] eta: 0:18:53 lr: 0.000011 loss_cls: 3.6320 (3.8995) grad_norm: 2.3485 (2.3621) time: 0.7580 data: 0.0003 max mem: 8426 +[2024-12-10 19:38:10 root] (utils.py 283): INFO Epoch: [15] [1040/2502] eta: 0:18:45 lr: 0.000011 loss_cls: 4.0014 (3.9006) grad_norm: 2.3523 (2.3620) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 19:38:18 root] (utils.py 283): INFO Epoch: [15] [1050/2502] eta: 0:18:38 lr: 0.000011 loss_cls: 4.1088 (3.9018) grad_norm: 2.3453 (2.3617) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 19:38:26 root] (utils.py 283): INFO Epoch: [15] [1060/2502] eta: 0:18:30 lr: 0.000011 loss_cls: 4.1409 (3.9018) grad_norm: 2.3011 (2.3614) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 19:38:33 root] (utils.py 283): INFO Epoch: [15] [1070/2502] eta: 0:18:23 lr: 0.000011 loss_cls: 3.7341 (3.9006) grad_norm: 2.3327 (2.3619) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 19:38:41 root] (utils.py 283): INFO Epoch: [15] [1080/2502] eta: 0:18:15 lr: 0.000011 loss_cls: 3.7341 (3.8995) grad_norm: 2.3517 (2.3616) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 19:38:49 root] (utils.py 283): INFO Epoch: [15] [1090/2502] eta: 0:18:07 lr: 0.000011 loss_cls: 4.0492 (3.9005) grad_norm: 2.3300 (2.3619) time: 0.7712 data: 0.0002 max mem: 8426 +[2024-12-10 19:38:56 root] (utils.py 283): INFO Epoch: [15] [1100/2502] eta: 0:17:59 lr: 0.000011 loss_cls: 3.8777 (3.8983) grad_norm: 2.3087 (2.3616) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 19:39:04 root] (utils.py 283): INFO Epoch: [15] [1110/2502] eta: 0:17:51 lr: 0.000011 loss_cls: 3.8078 (3.8986) grad_norm: 2.3087 (2.3616) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 19:39:12 root] (utils.py 283): INFO Epoch: [15] [1120/2502] eta: 0:17:44 lr: 0.000011 loss_cls: 3.7933 (3.8971) grad_norm: 2.2676 (2.3609) time: 0.7564 data: 0.0002 max mem: 8426 +[2024-12-10 19:39:19 root] (utils.py 283): INFO Epoch: [15] [1130/2502] eta: 0:17:36 lr: 0.000011 loss_cls: 3.7801 (3.8958) grad_norm: 2.3180 (2.3618) time: 0.7590 data: 0.0002 max mem: 8426 +[2024-12-10 19:39:27 root] (utils.py 283): INFO Epoch: [15] [1140/2502] eta: 0:17:28 lr: 0.000011 loss_cls: 4.0133 (3.8956) grad_norm: 2.3791 (2.3618) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 19:39:35 root] (utils.py 283): INFO Epoch: [15] [1150/2502] eta: 0:17:20 lr: 0.000011 loss_cls: 3.9196 (3.8952) grad_norm: 2.3846 (2.3622) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 19:39:42 root] (utils.py 283): INFO Epoch: [15] [1160/2502] eta: 0:17:13 lr: 0.000011 loss_cls: 3.8622 (3.8946) grad_norm: 2.3272 (2.3615) time: 0.7738 data: 0.0003 max mem: 8426 +[2024-12-10 19:39:50 root] (utils.py 283): INFO Epoch: [15] [1170/2502] eta: 0:17:05 lr: 0.000011 loss_cls: 3.7475 (3.8942) grad_norm: 2.3016 (2.3614) time: 0.7765 data: 0.0003 max mem: 8426 +[2024-12-10 19:39:58 root] (utils.py 283): INFO Epoch: [15] [1180/2502] eta: 0:16:57 lr: 0.000011 loss_cls: 4.1011 (3.8954) grad_norm: 2.3178 (2.3614) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 19:40:05 root] (utils.py 283): INFO Epoch: [15] [1190/2502] eta: 0:16:50 lr: 0.000011 loss_cls: 4.0677 (3.8946) grad_norm: 2.3166 (2.3610) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 19:40:13 root] (utils.py 283): INFO Epoch: [15] [1200/2502] eta: 0:16:42 lr: 0.000011 loss_cls: 4.0783 (3.8957) grad_norm: 2.3226 (2.3612) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 19:40:20 root] (utils.py 283): INFO Epoch: [15] [1210/2502] eta: 0:16:34 lr: 0.000011 loss_cls: 4.3182 (3.8987) grad_norm: 2.3226 (2.3609) time: 0.7568 data: 0.0003 max mem: 8426 +[2024-12-10 19:40:28 root] (utils.py 283): INFO Epoch: [15] [1220/2502] eta: 0:16:26 lr: 0.000011 loss_cls: 4.1764 (3.8988) grad_norm: 2.2795 (2.3607) time: 0.7583 data: 0.0003 max mem: 8426 +[2024-12-10 19:40:36 root] (utils.py 283): INFO Epoch: [15] [1230/2502] eta: 0:16:18 lr: 0.000011 loss_cls: 4.0707 (3.8969) grad_norm: 2.3501 (2.3614) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 19:40:43 root] (utils.py 283): INFO Epoch: [15] [1240/2502] eta: 0:16:10 lr: 0.000011 loss_cls: 3.6270 (3.8948) grad_norm: 2.3819 (2.3614) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 19:40:51 root] (utils.py 283): INFO Epoch: [15] [1250/2502] eta: 0:16:03 lr: 0.000011 loss_cls: 3.8128 (3.8956) grad_norm: 2.3415 (2.3614) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 19:40:58 root] (utils.py 283): INFO Epoch: [15] [1260/2502] eta: 0:15:55 lr: 0.000011 loss_cls: 4.0522 (3.8956) grad_norm: 2.3797 (2.3620) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:06 root] (utils.py 283): INFO Epoch: [15] [1270/2502] eta: 0:15:47 lr: 0.000011 loss_cls: 4.0983 (3.8966) grad_norm: 2.3722 (2.3619) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:14 root] (utils.py 283): INFO Epoch: [15] [1280/2502] eta: 0:15:39 lr: 0.000011 loss_cls: 3.8300 (3.8941) grad_norm: 2.2940 (2.3614) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:21 root] (utils.py 283): INFO Epoch: [15] [1290/2502] eta: 0:15:32 lr: 0.000011 loss_cls: 3.5391 (3.8929) grad_norm: 2.2769 (2.3612) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 19:41:29 root] (utils.py 283): INFO Epoch: [15] [1300/2502] eta: 0:15:24 lr: 0.000011 loss_cls: 3.4657 (3.8896) grad_norm: 2.4025 (2.3617) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:37 root] (utils.py 283): INFO Epoch: [15] [1310/2502] eta: 0:15:16 lr: 0.000011 loss_cls: 3.5045 (3.8884) grad_norm: 2.3972 (2.3618) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:44 root] (utils.py 283): INFO Epoch: [15] [1320/2502] eta: 0:15:08 lr: 0.000011 loss_cls: 3.7214 (3.8866) grad_norm: 2.3793 (2.3622) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:52 root] (utils.py 283): INFO Epoch: [15] [1330/2502] eta: 0:15:01 lr: 0.000011 loss_cls: 3.7214 (3.8854) grad_norm: 2.2943 (2.3616) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 19:41:59 root] (utils.py 283): INFO Epoch: [15] [1340/2502] eta: 0:14:53 lr: 0.000011 loss_cls: 3.9769 (3.8862) grad_norm: 2.3204 (2.3620) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 19:42:07 root] (utils.py 283): INFO Epoch: [15] [1350/2502] eta: 0:14:45 lr: 0.000011 loss_cls: 4.2958 (3.8893) grad_norm: 2.4603 (2.3624) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 19:42:15 root] (utils.py 283): INFO Epoch: [15] [1360/2502] eta: 0:14:37 lr: 0.000011 loss_cls: 4.2888 (3.8904) grad_norm: 2.3751 (2.3628) time: 0.7582 data: 0.0003 max mem: 8426 +[2024-12-10 19:42:22 root] (utils.py 283): INFO Epoch: [15] [1370/2502] eta: 0:14:30 lr: 0.000011 loss_cls: 4.1807 (3.8926) grad_norm: 2.3376 (2.3627) time: 0.7718 data: 0.0003 max mem: 8426 +[2024-12-10 19:42:30 root] (utils.py 283): INFO Epoch: [15] [1380/2502] eta: 0:14:22 lr: 0.000011 loss_cls: 4.0783 (3.8930) grad_norm: 2.2895 (2.3620) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 19:42:38 root] (utils.py 283): INFO Epoch: [15] [1390/2502] eta: 0:14:14 lr: 0.000011 loss_cls: 3.9956 (3.8927) grad_norm: 2.3136 (2.3624) time: 0.7608 data: 0.0003 max mem: 8426 +[2024-12-10 19:42:45 root] (utils.py 283): INFO Epoch: [15] [1400/2502] eta: 0:14:06 lr: 0.000011 loss_cls: 3.8381 (3.8919) grad_norm: 2.3981 (2.3624) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 19:42:53 root] (utils.py 283): INFO Epoch: [15] [1410/2502] eta: 0:13:59 lr: 0.000011 loss_cls: 3.6994 (3.8905) grad_norm: 2.4155 (2.3629) time: 0.7582 data: 0.0003 max mem: 8426 +[2024-12-10 19:43:00 root] (utils.py 283): INFO Epoch: [15] [1420/2502] eta: 0:13:51 lr: 0.000011 loss_cls: 3.7671 (3.8909) grad_norm: 2.4005 (2.3626) time: 0.7568 data: 0.0003 max mem: 8426 +[2024-12-10 19:43:08 root] (utils.py 283): INFO Epoch: [15] [1430/2502] eta: 0:13:43 lr: 0.000011 loss_cls: 4.3497 (3.8940) grad_norm: 2.2920 (2.3625) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 19:43:16 root] (utils.py 283): INFO Epoch: [15] [1440/2502] eta: 0:13:35 lr: 0.000011 loss_cls: 4.3497 (3.8924) grad_norm: 2.3638 (2.3634) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 19:43:23 root] (utils.py 283): INFO Epoch: [15] [1450/2502] eta: 0:13:28 lr: 0.000011 loss_cls: 3.7185 (3.8929) grad_norm: 2.3638 (2.3631) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 19:43:31 root] (utils.py 283): INFO Epoch: [15] [1460/2502] eta: 0:13:20 lr: 0.000011 loss_cls: 4.0580 (3.8941) grad_norm: 2.2867 (2.3628) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 19:43:39 root] (utils.py 283): INFO Epoch: [15] [1470/2502] eta: 0:13:12 lr: 0.000011 loss_cls: 3.8529 (3.8935) grad_norm: 2.3061 (2.3625) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 19:43:46 root] (utils.py 283): INFO Epoch: [15] [1480/2502] eta: 0:13:05 lr: 0.000011 loss_cls: 3.7091 (3.8920) grad_norm: 2.2579 (2.3618) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 19:43:54 root] (utils.py 283): INFO Epoch: [15] [1490/2502] eta: 0:12:57 lr: 0.000011 loss_cls: 3.4983 (3.8907) grad_norm: 2.2842 (2.3617) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:01 root] (utils.py 283): INFO Epoch: [15] [1500/2502] eta: 0:12:49 lr: 0.000011 loss_cls: 4.0040 (3.8910) grad_norm: 2.3529 (2.3615) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:09 root] (utils.py 283): INFO Epoch: [15] [1510/2502] eta: 0:12:41 lr: 0.000011 loss_cls: 4.2170 (3.8921) grad_norm: 2.3691 (2.3621) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:17 root] (utils.py 283): INFO Epoch: [15] [1520/2502] eta: 0:12:34 lr: 0.000011 loss_cls: 4.3796 (3.8951) grad_norm: 2.3999 (2.3624) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:24 root] (utils.py 283): INFO Epoch: [15] [1530/2502] eta: 0:12:26 lr: 0.000011 loss_cls: 4.1833 (3.8944) grad_norm: 2.3088 (2.3623) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:32 root] (utils.py 283): INFO Epoch: [15] [1540/2502] eta: 0:12:18 lr: 0.000011 loss_cls: 4.1228 (3.8956) grad_norm: 2.3055 (2.3626) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:40 root] (utils.py 283): INFO Epoch: [15] [1550/2502] eta: 0:12:11 lr: 0.000011 loss_cls: 4.1774 (3.8967) grad_norm: 2.3796 (2.3630) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 19:44:47 root] (utils.py 283): INFO Epoch: [15] [1560/2502] eta: 0:12:03 lr: 0.000011 loss_cls: 4.2165 (3.8978) grad_norm: 2.3777 (2.3629) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 19:44:55 root] (utils.py 283): INFO Epoch: [15] [1570/2502] eta: 0:11:55 lr: 0.000011 loss_cls: 4.1408 (3.8963) grad_norm: 2.3656 (2.3630) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 19:45:03 root] (utils.py 283): INFO Epoch: [15] [1580/2502] eta: 0:11:48 lr: 0.000011 loss_cls: 4.0780 (3.8979) grad_norm: 2.4090 (2.3633) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:10 root] (utils.py 283): INFO Epoch: [15] [1590/2502] eta: 0:11:40 lr: 0.000011 loss_cls: 4.2361 (3.8989) grad_norm: 2.3996 (2.3634) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:18 root] (utils.py 283): INFO Epoch: [15] [1600/2502] eta: 0:11:32 lr: 0.000011 loss_cls: 4.2130 (3.8986) grad_norm: 2.3271 (2.3627) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:26 root] (utils.py 283): INFO Epoch: [15] [1610/2502] eta: 0:11:24 lr: 0.000011 loss_cls: 4.0189 (3.8977) grad_norm: 2.2714 (2.3624) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:33 root] (utils.py 283): INFO Epoch: [15] [1620/2502] eta: 0:11:17 lr: 0.000011 loss_cls: 3.6345 (3.8968) grad_norm: 2.3505 (2.3627) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:41 root] (utils.py 283): INFO Epoch: [15] [1630/2502] eta: 0:11:09 lr: 0.000011 loss_cls: 4.0168 (3.8965) grad_norm: 2.3610 (2.3628) time: 0.7752 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:49 root] (utils.py 283): INFO Epoch: [15] [1640/2502] eta: 0:11:02 lr: 0.000011 loss_cls: 3.8769 (3.8963) grad_norm: 2.3671 (2.3630) time: 0.7760 data: 0.0002 max mem: 8426 +[2024-12-10 19:45:56 root] (utils.py 283): INFO Epoch: [15] [1650/2502] eta: 0:10:54 lr: 0.000011 loss_cls: 3.9424 (3.8986) grad_norm: 2.4406 (2.3636) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 19:46:04 root] (utils.py 283): INFO Epoch: [15] [1660/2502] eta: 0:10:46 lr: 0.000011 loss_cls: 4.0085 (3.8979) grad_norm: 2.3651 (2.3633) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 19:46:12 root] (utils.py 283): INFO Epoch: [15] [1670/2502] eta: 0:10:38 lr: 0.000011 loss_cls: 4.0074 (3.8993) grad_norm: 2.3391 (2.3635) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 19:46:19 root] (utils.py 283): INFO Epoch: [15] [1680/2502] eta: 0:10:31 lr: 0.000011 loss_cls: 4.1184 (3.8996) grad_norm: 2.3614 (2.3638) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 19:46:27 root] (utils.py 283): INFO Epoch: [15] [1690/2502] eta: 0:10:23 lr: 0.000011 loss_cls: 4.1268 (3.9006) grad_norm: 2.4044 (2.3637) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 19:46:35 root] (utils.py 283): INFO Epoch: [15] [1700/2502] eta: 0:10:15 lr: 0.000011 loss_cls: 4.1181 (3.9010) grad_norm: 2.3846 (2.3636) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 19:46:42 root] (utils.py 283): INFO Epoch: [15] [1710/2502] eta: 0:10:08 lr: 0.000011 loss_cls: 4.0946 (3.9006) grad_norm: 2.3063 (2.3635) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 19:46:50 root] (utils.py 283): INFO Epoch: [15] [1720/2502] eta: 0:10:00 lr: 0.000011 loss_cls: 4.0369 (3.9000) grad_norm: 2.2969 (2.3633) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 19:46:58 root] (utils.py 283): INFO Epoch: [15] [1730/2502] eta: 0:09:52 lr: 0.000011 loss_cls: 4.1710 (3.9018) grad_norm: 2.2975 (2.3629) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:05 root] (utils.py 283): INFO Epoch: [15] [1740/2502] eta: 0:09:45 lr: 0.000011 loss_cls: 4.0760 (3.9008) grad_norm: 2.2975 (2.3627) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:13 root] (utils.py 283): INFO Epoch: [15] [1750/2502] eta: 0:09:37 lr: 0.000011 loss_cls: 3.8661 (3.9007) grad_norm: 2.3229 (2.3626) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:21 root] (utils.py 283): INFO Epoch: [15] [1760/2502] eta: 0:09:29 lr: 0.000011 loss_cls: 3.7318 (3.8994) grad_norm: 2.4056 (2.3631) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:28 root] (utils.py 283): INFO Epoch: [15] [1770/2502] eta: 0:09:21 lr: 0.000011 loss_cls: 3.6169 (3.8985) grad_norm: 2.4056 (2.3633) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:36 root] (utils.py 283): INFO Epoch: [15] [1780/2502] eta: 0:09:14 lr: 0.000011 loss_cls: 3.7737 (3.8996) grad_norm: 2.3875 (2.3640) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:44 root] (utils.py 283): INFO Epoch: [15] [1790/2502] eta: 0:09:06 lr: 0.000011 loss_cls: 3.7737 (3.8992) grad_norm: 2.3872 (2.3643) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:51 root] (utils.py 283): INFO Epoch: [15] [1800/2502] eta: 0:08:58 lr: 0.000011 loss_cls: 3.6298 (3.8977) grad_norm: 2.3565 (2.3645) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 19:47:59 root] (utils.py 283): INFO Epoch: [15] [1810/2502] eta: 0:08:51 lr: 0.000011 loss_cls: 3.7655 (3.8974) grad_norm: 2.4275 (2.3651) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 19:48:07 root] (utils.py 283): INFO Epoch: [15] [1820/2502] eta: 0:08:43 lr: 0.000011 loss_cls: 3.8426 (3.8977) grad_norm: 2.3834 (2.3650) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 19:48:14 root] (utils.py 283): INFO Epoch: [15] [1830/2502] eta: 0:08:35 lr: 0.000011 loss_cls: 3.8314 (3.8979) grad_norm: 2.3078 (2.3651) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 19:48:22 root] (utils.py 283): INFO Epoch: [15] [1840/2502] eta: 0:08:28 lr: 0.000011 loss_cls: 3.9876 (3.8993) grad_norm: 2.3078 (2.3651) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 19:48:30 root] (utils.py 283): INFO Epoch: [15] [1850/2502] eta: 0:08:20 lr: 0.000011 loss_cls: 3.9876 (3.8982) grad_norm: 2.3553 (2.3653) time: 0.7812 data: 0.0003 max mem: 8426 +[2024-12-10 19:48:38 root] (utils.py 283): INFO Epoch: [15] [1860/2502] eta: 0:08:13 lr: 0.000011 loss_cls: 3.8639 (3.8987) grad_norm: 2.3879 (2.3656) time: 0.7816 data: 0.0003 max mem: 8426 +[2024-12-10 19:48:46 root] (utils.py 283): INFO Epoch: [15] [1870/2502] eta: 0:08:05 lr: 0.000011 loss_cls: 4.2723 (3.9003) grad_norm: 2.3396 (2.3651) time: 0.7818 data: 0.0002 max mem: 8426 +[2024-12-10 19:48:53 root] (utils.py 283): INFO Epoch: [15] [1880/2502] eta: 0:07:57 lr: 0.000011 loss_cls: 4.2109 (3.9014) grad_norm: 2.3484 (2.3653) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 19:49:01 root] (utils.py 283): INFO Epoch: [15] [1890/2502] eta: 0:07:50 lr: 0.000011 loss_cls: 4.0121 (3.9020) grad_norm: 2.4051 (2.3654) time: 0.7834 data: 0.0003 max mem: 8426 +[2024-12-10 19:49:09 root] (utils.py 283): INFO Epoch: [15] [1900/2502] eta: 0:07:42 lr: 0.000011 loss_cls: 4.2367 (3.9031) grad_norm: 2.3947 (2.3656) time: 0.7826 data: 0.0003 max mem: 8426 +[2024-12-10 19:49:17 root] (utils.py 283): INFO Epoch: [15] [1910/2502] eta: 0:07:34 lr: 0.000011 loss_cls: 4.0170 (3.9019) grad_norm: 2.3568 (2.3654) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 19:49:25 root] (utils.py 283): INFO Epoch: [15] [1920/2502] eta: 0:07:27 lr: 0.000011 loss_cls: 4.0105 (3.9033) grad_norm: 2.3232 (2.3655) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 19:49:33 root] (utils.py 283): INFO Epoch: [15] [1930/2502] eta: 0:07:19 lr: 0.000011 loss_cls: 4.0105 (3.9026) grad_norm: 2.3869 (2.3658) time: 0.7832 data: 0.0003 max mem: 8426 +[2024-12-10 19:49:40 root] (utils.py 283): INFO Epoch: [15] [1940/2502] eta: 0:07:11 lr: 0.000011 loss_cls: 3.9116 (3.9035) grad_norm: 2.3816 (2.3656) time: 0.7818 data: 0.0002 max mem: 8426 +[2024-12-10 19:49:48 root] (utils.py 283): INFO Epoch: [15] [1950/2502] eta: 0:07:04 lr: 0.000011 loss_cls: 4.2178 (3.9043) grad_norm: 2.3369 (2.3654) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-10 19:49:56 root] (utils.py 283): INFO Epoch: [15] [1960/2502] eta: 0:06:56 lr: 0.000011 loss_cls: 4.1057 (3.9038) grad_norm: 2.3175 (2.3653) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 19:50:04 root] (utils.py 283): INFO Epoch: [15] [1970/2502] eta: 0:06:49 lr: 0.000011 loss_cls: 4.1374 (3.9042) grad_norm: 2.2965 (2.3652) time: 0.7833 data: 0.0003 max mem: 8426 +[2024-12-10 19:50:12 root] (utils.py 283): INFO Epoch: [15] [1980/2502] eta: 0:06:41 lr: 0.000011 loss_cls: 4.1482 (3.9046) grad_norm: 2.2814 (2.3647) time: 0.7840 data: 0.0003 max mem: 8426 +[2024-12-10 19:50:20 root] (utils.py 283): INFO Epoch: [15] [1990/2502] eta: 0:06:33 lr: 0.000011 loss_cls: 4.2897 (3.9064) grad_norm: 2.2853 (2.3649) time: 0.7835 data: 0.0003 max mem: 8426 +[2024-12-10 19:50:27 root] (utils.py 283): INFO Epoch: [15] [2000/2502] eta: 0:06:26 lr: 0.000011 loss_cls: 4.2669 (3.9068) grad_norm: 2.3728 (2.3647) time: 0.7818 data: 0.0003 max mem: 8426 +[2024-12-10 19:50:35 root] (utils.py 283): INFO Epoch: [15] [2010/2502] eta: 0:06:18 lr: 0.000011 loss_cls: 4.0892 (3.9062) grad_norm: 2.3728 (2.3649) time: 0.7883 data: 0.0002 max mem: 8426 +[2024-12-10 19:50:43 root] (utils.py 283): INFO Epoch: [15] [2020/2502] eta: 0:06:10 lr: 0.000011 loss_cls: 3.5614 (3.9047) grad_norm: 2.3606 (2.3648) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 19:50:51 root] (utils.py 283): INFO Epoch: [15] [2030/2502] eta: 0:06:03 lr: 0.000011 loss_cls: 3.5614 (3.9037) grad_norm: 2.3606 (2.3649) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 19:50:58 root] (utils.py 283): INFO Epoch: [15] [2040/2502] eta: 0:05:55 lr: 0.000011 loss_cls: 3.9882 (3.9039) grad_norm: 2.3278 (2.3647) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 19:51:06 root] (utils.py 283): INFO Epoch: [15] [2050/2502] eta: 0:05:47 lr: 0.000011 loss_cls: 4.2313 (3.9055) grad_norm: 2.3161 (2.3644) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 19:51:13 root] (utils.py 283): INFO Epoch: [15] [2060/2502] eta: 0:05:39 lr: 0.000011 loss_cls: 4.2139 (3.9066) grad_norm: 2.3164 (2.3642) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 19:51:21 root] (utils.py 283): INFO Epoch: [15] [2070/2502] eta: 0:05:32 lr: 0.000011 loss_cls: 4.2737 (3.9076) grad_norm: 2.3441 (2.3642) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 19:51:29 root] (utils.py 283): INFO Epoch: [15] [2080/2502] eta: 0:05:24 lr: 0.000011 loss_cls: 4.0023 (3.9068) grad_norm: 2.3491 (2.3642) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 19:51:36 root] (utils.py 283): INFO Epoch: [15] [2090/2502] eta: 0:05:16 lr: 0.000011 loss_cls: 4.0040 (3.9081) grad_norm: 2.3491 (2.3642) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 19:51:44 root] (utils.py 283): INFO Epoch: [15] [2100/2502] eta: 0:05:09 lr: 0.000011 loss_cls: 4.1174 (3.9089) grad_norm: 2.3160 (2.3642) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 19:51:52 root] (utils.py 283): INFO Epoch: [15] [2110/2502] eta: 0:05:01 lr: 0.000011 loss_cls: 4.0314 (3.9088) grad_norm: 2.3050 (2.3638) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 19:51:59 root] (utils.py 283): INFO Epoch: [15] [2120/2502] eta: 0:04:53 lr: 0.000011 loss_cls: 4.0314 (3.9090) grad_norm: 2.2892 (2.3637) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:07 root] (utils.py 283): INFO Epoch: [15] [2130/2502] eta: 0:04:46 lr: 0.000011 loss_cls: 4.1132 (3.9089) grad_norm: 2.3235 (2.3638) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:15 root] (utils.py 283): INFO Epoch: [15] [2140/2502] eta: 0:04:38 lr: 0.000011 loss_cls: 4.1886 (3.9087) grad_norm: 2.3635 (2.3637) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:22 root] (utils.py 283): INFO Epoch: [15] [2150/2502] eta: 0:04:30 lr: 0.000011 loss_cls: 4.2434 (3.9094) grad_norm: 2.3103 (2.3635) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:30 root] (utils.py 283): INFO Epoch: [15] [2160/2502] eta: 0:04:22 lr: 0.000011 loss_cls: 4.2434 (3.9100) grad_norm: 2.2818 (2.3634) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:37 root] (utils.py 283): INFO Epoch: [15] [2170/2502] eta: 0:04:15 lr: 0.000011 loss_cls: 4.1682 (3.9108) grad_norm: 2.2685 (2.3631) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:45 root] (utils.py 283): INFO Epoch: [15] [2180/2502] eta: 0:04:07 lr: 0.000011 loss_cls: 3.9916 (3.9100) grad_norm: 2.2484 (2.3630) time: 0.7734 data: 0.0002 max mem: 8426 +[2024-12-10 19:52:53 root] (utils.py 283): INFO Epoch: [15] [2190/2502] eta: 0:03:59 lr: 0.000011 loss_cls: 3.9916 (3.9102) grad_norm: 2.2967 (2.3629) time: 0.7826 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:01 root] (utils.py 283): INFO Epoch: [15] [2200/2502] eta: 0:03:52 lr: 0.000011 loss_cls: 4.1768 (3.9114) grad_norm: 2.3324 (2.3629) time: 0.7840 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:09 root] (utils.py 283): INFO Epoch: [15] [2210/2502] eta: 0:03:44 lr: 0.000011 loss_cls: 4.1650 (3.9113) grad_norm: 2.3065 (2.3627) time: 0.7863 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:17 root] (utils.py 283): INFO Epoch: [15] [2220/2502] eta: 0:03:36 lr: 0.000011 loss_cls: 4.0622 (3.9118) grad_norm: 2.3108 (2.3625) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:24 root] (utils.py 283): INFO Epoch: [15] [2230/2502] eta: 0:03:29 lr: 0.000011 loss_cls: 4.0840 (3.9120) grad_norm: 2.3494 (2.3628) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:32 root] (utils.py 283): INFO Epoch: [15] [2240/2502] eta: 0:03:21 lr: 0.000011 loss_cls: 4.0188 (3.9115) grad_norm: 2.3195 (2.3627) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:39 root] (utils.py 283): INFO Epoch: [15] [2250/2502] eta: 0:03:13 lr: 0.000011 loss_cls: 3.8880 (3.9105) grad_norm: 2.3195 (2.3627) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:47 root] (utils.py 283): INFO Epoch: [15] [2260/2502] eta: 0:03:06 lr: 0.000011 loss_cls: 3.9655 (3.9106) grad_norm: 2.3613 (2.3626) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 19:53:55 root] (utils.py 283): INFO Epoch: [15] [2270/2502] eta: 0:02:58 lr: 0.000011 loss_cls: 4.0479 (3.9096) grad_norm: 2.3251 (2.3623) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:02 root] (utils.py 283): INFO Epoch: [15] [2280/2502] eta: 0:02:50 lr: 0.000011 loss_cls: 4.0028 (3.9099) grad_norm: 2.3502 (2.3627) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:10 root] (utils.py 283): INFO Epoch: [15] [2290/2502] eta: 0:02:42 lr: 0.000011 loss_cls: 4.1627 (3.9108) grad_norm: 2.3765 (2.3626) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:18 root] (utils.py 283): INFO Epoch: [15] [2300/2502] eta: 0:02:35 lr: 0.000011 loss_cls: 4.1125 (3.9110) grad_norm: 2.3381 (2.3626) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:25 root] (utils.py 283): INFO Epoch: [15] [2310/2502] eta: 0:02:27 lr: 0.000011 loss_cls: 4.0531 (3.9106) grad_norm: 2.2981 (2.3625) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:33 root] (utils.py 283): INFO Epoch: [15] [2320/2502] eta: 0:02:19 lr: 0.000011 loss_cls: 3.9350 (3.9095) grad_norm: 2.3613 (2.3626) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:41 root] (utils.py 283): INFO Epoch: [15] [2330/2502] eta: 0:02:12 lr: 0.000011 loss_cls: 3.8922 (3.9100) grad_norm: 2.3570 (2.3624) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:48 root] (utils.py 283): INFO Epoch: [15] [2340/2502] eta: 0:02:04 lr: 0.000011 loss_cls: 4.1080 (3.9110) grad_norm: 2.2467 (2.3621) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 19:54:56 root] (utils.py 283): INFO Epoch: [15] [2350/2502] eta: 0:01:56 lr: 0.000011 loss_cls: 4.1080 (3.9107) grad_norm: 2.3244 (2.3619) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:03 root] (utils.py 283): INFO Epoch: [15] [2360/2502] eta: 0:01:49 lr: 0.000011 loss_cls: 3.5918 (3.9088) grad_norm: 2.3348 (2.3626) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:11 root] (utils.py 283): INFO Epoch: [15] [2370/2502] eta: 0:01:41 lr: 0.000011 loss_cls: 3.5918 (3.9094) grad_norm: 2.3097 (2.3627) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:19 root] (utils.py 283): INFO Epoch: [15] [2380/2502] eta: 0:01:33 lr: 0.000011 loss_cls: 3.9772 (3.9087) grad_norm: 2.3906 (2.3629) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:26 root] (utils.py 283): INFO Epoch: [15] [2390/2502] eta: 0:01:26 lr: 0.000011 loss_cls: 3.9772 (3.9090) grad_norm: 2.3800 (2.3627) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:34 root] (utils.py 283): INFO Epoch: [15] [2400/2502] eta: 0:01:18 lr: 0.000011 loss_cls: 3.8457 (3.9083) grad_norm: 2.3584 (2.3626) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:42 root] (utils.py 283): INFO Epoch: [15] [2410/2502] eta: 0:01:10 lr: 0.000011 loss_cls: 3.9505 (3.9094) grad_norm: 2.3697 (2.3626) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 19:55:49 root] (utils.py 283): INFO Epoch: [15] [2420/2502] eta: 0:01:03 lr: 0.000011 loss_cls: 4.3006 (3.9103) grad_norm: 2.3697 (2.3629) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 19:55:57 root] (utils.py 283): INFO Epoch: [15] [2430/2502] eta: 0:00:55 lr: 0.000011 loss_cls: 3.7220 (3.9089) grad_norm: 2.3768 (2.3629) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 19:56:05 root] (utils.py 283): INFO Epoch: [15] [2440/2502] eta: 0:00:47 lr: 0.000011 loss_cls: 3.6538 (3.9094) grad_norm: 2.3703 (2.3632) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 19:56:12 root] (utils.py 283): INFO Epoch: [15] [2450/2502] eta: 0:00:39 lr: 0.000011 loss_cls: 4.0198 (3.9097) grad_norm: 2.3703 (2.3631) time: 0.7740 data: 0.0003 max mem: 8426 +[2024-12-10 19:56:20 root] (utils.py 283): INFO Epoch: [15] [2460/2502] eta: 0:00:32 lr: 0.000011 loss_cls: 4.3875 (3.9117) grad_norm: 2.3358 (2.3632) time: 0.7804 data: 0.0002 max mem: 8426 +[2024-12-10 19:56:28 root] (utils.py 283): INFO Epoch: [15] [2470/2502] eta: 0:00:24 lr: 0.000011 loss_cls: 4.2220 (3.9113) grad_norm: 2.3358 (2.3631) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 19:56:35 root] (utils.py 283): INFO Epoch: [15] [2480/2502] eta: 0:00:16 lr: 0.000011 loss_cls: 4.2077 (3.9118) grad_norm: 2.3713 (2.3635) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 19:56:44 root] (utils.py 283): INFO Epoch: [15] [2490/2502] eta: 0:00:09 lr: 0.000011 loss_cls: 3.7152 (3.9109) grad_norm: 2.4486 (2.3639) time: 0.7893 data: 0.0234 max mem: 8426 +[2024-12-10 19:56:51 root] (utils.py 283): INFO Epoch: [15] [2500/2502] eta: 0:00:01 lr: 0.000011 loss_cls: 3.5817 (3.9102) grad_norm: 2.4486 (2.3642) time: 0.7864 data: 0.0234 max mem: 8426 +[2024-12-10 19:56:52 root] (utils.py 283): INFO Epoch: [15] [2501/2502] eta: 0:00:00 lr: 0.000011 loss_cls: 3.5817 (3.9101) grad_norm: 2.4486 (2.3642) time: 0.7856 data: 0.0234 max mem: 8426 +[2024-12-10 19:56:52 root] (utils.py 297): INFO Epoch: [15] Total time: 0:32:03 (0.7688 s / it) +[2024-12-10 19:56:52 root] (engine.py 179): INFO Averaged stats:lr: 0.000011 loss_cls: 3.5817 (3.9154) grad_norm: 2.4486 (2.3642) +[2024-12-10 19:56:52 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6095 (0.6095) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 99.2188 (99.2188) time: 0.1275 data: 0.0004 max mem: 8426 +[2024-12-10 19:56:54 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7069 (0.8064) acc1: 86.7188 (82.3864) acc3: 95.3125 (93.6080) acc5: 97.6562 (96.8040) time: 0.1277 data: 0.0004 max mem: 8426 +[2024-12-10 19:56:55 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8603 (0.8624) acc1: 79.6875 (81.1384) acc3: 92.1875 (92.7827) acc5: 95.3125 (95.8705) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 19:56:56 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9762 (0.8767) acc1: 79.6875 (80.3931) acc3: 92.1875 (92.9183) acc5: 95.3125 (95.8669) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 19:56:58 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7992 (0.8678) acc1: 80.4688 (80.7927) acc3: 93.7500 (93.0259) acc5: 96.0938 (95.9223) time: 0.1416 data: 0.0132 max mem: 8426 +[2024-12-10 19:57:00 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0492 (0.9516) acc1: 75.7812 (78.7377) acc3: 88.2812 (91.6360) acc5: 92.9688 (94.9142) time: 0.1670 data: 0.0365 max mem: 8426 +[2024-12-10 19:57:01 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2552 (0.9973) acc1: 72.6562 (77.9969) acc3: 85.9375 (90.7531) acc5: 89.8438 (94.0318) time: 0.1758 data: 0.0461 max mem: 8426 +[2024-12-10 19:57:03 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1906 (1.0372) acc1: 74.2188 (77.1017) acc3: 86.7188 (90.3169) acc5: 89.8438 (93.5960) time: 0.1544 data: 0.0263 max mem: 8426 +[2024-12-10 19:57:04 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2067 (1.0703) acc1: 72.6562 (76.3117) acc3: 85.9375 (89.7087) acc5: 90.6250 (93.1520) time: 0.1324 data: 0.0042 max mem: 8426 +[2024-12-10 19:57:05 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2243 (1.0978) acc1: 71.0938 (75.6439) acc3: 85.1562 (89.3201) acc5: 89.8438 (92.8486) time: 0.1298 data: 0.0007 max mem: 8426 +[2024-12-10 19:57:06 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1446 (1.0875) acc1: 74.2188 (75.8240) acc3: 88.2812 (89.4560) acc5: 90.6250 (93.0240) time: 0.1299 data: 0.0007 max mem: 8426 +[2024-12-10 19:57:06 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1419 s / it) +[2024-12-10 19:57:07 root] (engine.py 264): INFO * Acc@1 75.634 Acc@3 89.552 Acc@5 92.854 loss 1.090 flops 1.285 layer_flops 1.251 +[2024-12-10 19:57:07 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 19:57:07 root] (main.py 576): INFO Max accuracy: 75.65% +[2024-12-10 19:57:08 root] (utils.py 283): INFO Epoch: [16] [ 0/2502] eta: 0:31:45 lr: 0.000011 loss_cls: 3.5573 (3.5573) grad_norm: 2.2934 (2.2934) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 19:57:16 root] (utils.py 283): INFO Epoch: [16] [ 10/2502] eta: 0:31:47 lr: 0.000011 loss_cls: 3.8381 (3.7290) grad_norm: 2.3080 (2.3268) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 19:57:23 root] (utils.py 283): INFO Epoch: [16] [ 20/2502] eta: 0:31:38 lr: 0.000011 loss_cls: 3.9252 (3.8544) grad_norm: 2.3159 (2.3170) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 19:57:31 root] (utils.py 283): INFO Epoch: [16] [ 30/2502] eta: 0:31:28 lr: 0.000011 loss_cls: 3.9298 (3.8361) grad_norm: 2.3387 (2.3233) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 19:57:39 root] (utils.py 283): INFO Epoch: [16] [ 40/2502] eta: 0:31:24 lr: 0.000011 loss_cls: 4.0760 (3.9059) grad_norm: 2.3640 (2.3338) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 19:57:47 root] (utils.py 283): INFO Epoch: [16] [ 50/2502] eta: 0:31:26 lr: 0.000011 loss_cls: 4.1798 (3.9167) grad_norm: 2.3480 (2.3428) time: 0.7782 data: 0.0002 max mem: 8426 +[2024-12-10 19:57:54 root] (utils.py 283): INFO Epoch: [16] [ 60/2502] eta: 0:31:20 lr: 0.000011 loss_cls: 4.1453 (3.9510) grad_norm: 2.3116 (2.3456) time: 0.7794 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:02 root] (utils.py 283): INFO Epoch: [16] [ 70/2502] eta: 0:31:11 lr: 0.000011 loss_cls: 4.0847 (3.9121) grad_norm: 2.3114 (2.3438) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:10 root] (utils.py 283): INFO Epoch: [16] [ 80/2502] eta: 0:31:03 lr: 0.000011 loss_cls: 3.9013 (3.9134) grad_norm: 2.3132 (2.3505) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:17 root] (utils.py 283): INFO Epoch: [16] [ 90/2502] eta: 0:30:53 lr: 0.000011 loss_cls: 3.9013 (3.8832) grad_norm: 2.3069 (2.3427) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 19:58:25 root] (utils.py 283): INFO Epoch: [16] [ 100/2502] eta: 0:30:43 lr: 0.000011 loss_cls: 3.6679 (3.8781) grad_norm: 2.2918 (2.3447) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:32 root] (utils.py 283): INFO Epoch: [16] [ 110/2502] eta: 0:30:34 lr: 0.000011 loss_cls: 4.0917 (3.9006) grad_norm: 2.3344 (2.3466) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:40 root] (utils.py 283): INFO Epoch: [16] [ 120/2502] eta: 0:30:27 lr: 0.000011 loss_cls: 4.2705 (3.9103) grad_norm: 2.3325 (2.3438) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:48 root] (utils.py 283): INFO Epoch: [16] [ 130/2502] eta: 0:30:19 lr: 0.000011 loss_cls: 4.0498 (3.9003) grad_norm: 2.2944 (2.3418) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 19:58:55 root] (utils.py 283): INFO Epoch: [16] [ 140/2502] eta: 0:30:11 lr: 0.000011 loss_cls: 3.9242 (3.9139) grad_norm: 2.3449 (2.3461) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 19:59:03 root] (utils.py 283): INFO Epoch: [16] [ 150/2502] eta: 0:30:04 lr: 0.000011 loss_cls: 3.9102 (3.9126) grad_norm: 2.3762 (2.3447) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 19:59:11 root] (utils.py 283): INFO Epoch: [16] [ 160/2502] eta: 0:29:56 lr: 0.000011 loss_cls: 3.7676 (3.9004) grad_norm: 2.3166 (2.3444) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 19:59:18 root] (utils.py 283): INFO Epoch: [16] [ 170/2502] eta: 0:29:48 lr: 0.000011 loss_cls: 3.7676 (3.9099) grad_norm: 2.2651 (2.3385) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 19:59:26 root] (utils.py 283): INFO Epoch: [16] [ 180/2502] eta: 0:29:41 lr: 0.000011 loss_cls: 3.8483 (3.9055) grad_norm: 2.2974 (2.3436) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 19:59:34 root] (utils.py 283): INFO Epoch: [16] [ 190/2502] eta: 0:29:33 lr: 0.000011 loss_cls: 3.9832 (3.9134) grad_norm: 2.3568 (2.3426) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 19:59:41 root] (utils.py 283): INFO Epoch: [16] [ 200/2502] eta: 0:29:25 lr: 0.000011 loss_cls: 3.9832 (3.9175) grad_norm: 2.3112 (2.3432) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 19:59:49 root] (utils.py 283): INFO Epoch: [16] [ 210/2502] eta: 0:29:18 lr: 0.000011 loss_cls: 3.8476 (3.9011) grad_norm: 2.3478 (2.3452) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 19:59:57 root] (utils.py 283): INFO Epoch: [16] [ 220/2502] eta: 0:29:10 lr: 0.000011 loss_cls: 3.6608 (3.8970) grad_norm: 2.4232 (2.3509) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 20:00:04 root] (utils.py 283): INFO Epoch: [16] [ 230/2502] eta: 0:29:02 lr: 0.000011 loss_cls: 4.1754 (3.9086) grad_norm: 2.3907 (2.3506) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 20:00:12 root] (utils.py 283): INFO Epoch: [16] [ 240/2502] eta: 0:28:54 lr: 0.000011 loss_cls: 4.2133 (3.9186) grad_norm: 2.2984 (2.3489) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 20:00:20 root] (utils.py 283): INFO Epoch: [16] [ 250/2502] eta: 0:28:47 lr: 0.000011 loss_cls: 4.1396 (3.9270) grad_norm: 2.3294 (2.3500) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 20:00:27 root] (utils.py 283): INFO Epoch: [16] [ 260/2502] eta: 0:28:39 lr: 0.000011 loss_cls: 4.2054 (3.9449) grad_norm: 2.3491 (2.3513) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 20:00:35 root] (utils.py 283): INFO Epoch: [16] [ 270/2502] eta: 0:28:32 lr: 0.000011 loss_cls: 4.2322 (3.9490) grad_norm: 2.3800 (2.3512) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 20:00:43 root] (utils.py 283): INFO Epoch: [16] [ 280/2502] eta: 0:28:25 lr: 0.000011 loss_cls: 4.1982 (3.9530) grad_norm: 2.3411 (2.3513) time: 0.7794 data: 0.0002 max mem: 8426 +[2024-12-10 20:00:51 root] (utils.py 283): INFO Epoch: [16] [ 290/2502] eta: 0:28:18 lr: 0.000011 loss_cls: 3.9309 (3.9460) grad_norm: 2.3411 (2.3508) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 20:00:58 root] (utils.py 283): INFO Epoch: [16] [ 300/2502] eta: 0:28:10 lr: 0.000011 loss_cls: 3.7509 (3.9408) grad_norm: 2.3349 (2.3496) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 20:01:06 root] (utils.py 283): INFO Epoch: [16] [ 310/2502] eta: 0:28:02 lr: 0.000011 loss_cls: 3.9254 (3.9404) grad_norm: 2.3349 (2.3499) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 20:01:14 root] (utils.py 283): INFO Epoch: [16] [ 320/2502] eta: 0:27:55 lr: 0.000011 loss_cls: 3.9254 (3.9337) grad_norm: 2.3552 (2.3525) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 20:01:21 root] (utils.py 283): INFO Epoch: [16] [ 330/2502] eta: 0:27:47 lr: 0.000011 loss_cls: 3.6752 (3.9271) grad_norm: 2.3268 (2.3499) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 20:01:29 root] (utils.py 283): INFO Epoch: [16] [ 340/2502] eta: 0:27:39 lr: 0.000011 loss_cls: 3.8964 (3.9297) grad_norm: 2.3268 (2.3502) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 20:01:37 root] (utils.py 283): INFO Epoch: [16] [ 350/2502] eta: 0:27:32 lr: 0.000011 loss_cls: 4.0068 (3.9333) grad_norm: 2.3710 (2.3505) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 20:01:45 root] (utils.py 283): INFO Epoch: [16] [ 360/2502] eta: 0:27:25 lr: 0.000011 loss_cls: 4.2319 (3.9322) grad_norm: 2.3019 (2.3491) time: 0.7768 data: 0.0002 max mem: 8426 +[2024-12-10 20:01:52 root] (utils.py 283): INFO Epoch: [16] [ 370/2502] eta: 0:27:18 lr: 0.000011 loss_cls: 3.9345 (3.9276) grad_norm: 2.3284 (2.3498) time: 0.7783 data: 0.0002 max mem: 8426 +[2024-12-10 20:02:00 root] (utils.py 283): INFO Epoch: [16] [ 380/2502] eta: 0:27:10 lr: 0.000011 loss_cls: 3.8418 (3.9274) grad_norm: 2.3965 (2.3516) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 20:02:08 root] (utils.py 283): INFO Epoch: [16] [ 390/2502] eta: 0:27:02 lr: 0.000011 loss_cls: 3.9611 (3.9269) grad_norm: 2.3442 (2.3506) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 20:02:15 root] (utils.py 283): INFO Epoch: [16] [ 400/2502] eta: 0:26:54 lr: 0.000011 loss_cls: 3.8407 (3.9240) grad_norm: 2.3406 (2.3516) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 20:02:23 root] (utils.py 283): INFO Epoch: [16] [ 410/2502] eta: 0:26:46 lr: 0.000011 loss_cls: 3.7585 (3.9217) grad_norm: 2.3416 (2.3517) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 20:02:31 root] (utils.py 283): INFO Epoch: [16] [ 420/2502] eta: 0:26:38 lr: 0.000011 loss_cls: 4.2519 (3.9294) grad_norm: 2.3395 (2.3515) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 20:02:38 root] (utils.py 283): INFO Epoch: [16] [ 430/2502] eta: 0:26:30 lr: 0.000011 loss_cls: 4.2010 (3.9331) grad_norm: 2.3395 (2.3504) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 20:02:46 root] (utils.py 283): INFO Epoch: [16] [ 440/2502] eta: 0:26:23 lr: 0.000011 loss_cls: 4.0956 (3.9370) grad_norm: 2.3392 (2.3506) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 20:02:54 root] (utils.py 283): INFO Epoch: [16] [ 450/2502] eta: 0:26:15 lr: 0.000011 loss_cls: 4.0956 (3.9414) grad_norm: 2.3392 (2.3505) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:01 root] (utils.py 283): INFO Epoch: [16] [ 460/2502] eta: 0:26:07 lr: 0.000011 loss_cls: 4.0818 (3.9428) grad_norm: 2.3388 (2.3504) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:09 root] (utils.py 283): INFO Epoch: [16] [ 470/2502] eta: 0:25:59 lr: 0.000011 loss_cls: 3.9350 (3.9442) grad_norm: 2.3430 (2.3507) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:17 root] (utils.py 283): INFO Epoch: [16] [ 480/2502] eta: 0:25:52 lr: 0.000011 loss_cls: 4.1062 (3.9451) grad_norm: 2.3595 (2.3505) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:24 root] (utils.py 283): INFO Epoch: [16] [ 490/2502] eta: 0:25:44 lr: 0.000011 loss_cls: 4.1956 (3.9497) grad_norm: 2.3495 (2.3505) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:32 root] (utils.py 283): INFO Epoch: [16] [ 500/2502] eta: 0:25:36 lr: 0.000011 loss_cls: 4.0827 (3.9496) grad_norm: 2.3079 (2.3496) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:39 root] (utils.py 283): INFO Epoch: [16] [ 510/2502] eta: 0:25:28 lr: 0.000011 loss_cls: 3.7853 (3.9427) grad_norm: 2.4092 (2.3525) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:47 root] (utils.py 283): INFO Epoch: [16] [ 520/2502] eta: 0:25:20 lr: 0.000011 loss_cls: 3.6749 (3.9404) grad_norm: 2.3011 (2.3509) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 20:03:55 root] (utils.py 283): INFO Epoch: [16] [ 530/2502] eta: 0:25:13 lr: 0.000011 loss_cls: 3.6749 (3.9355) grad_norm: 2.3209 (2.3527) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 20:04:03 root] (utils.py 283): INFO Epoch: [16] [ 540/2502] eta: 0:25:05 lr: 0.000011 loss_cls: 3.7539 (3.9398) grad_norm: 2.3509 (2.3516) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 20:04:10 root] (utils.py 283): INFO Epoch: [16] [ 550/2502] eta: 0:24:57 lr: 0.000011 loss_cls: 4.1733 (3.9395) grad_norm: 2.2986 (2.3509) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 20:04:18 root] (utils.py 283): INFO Epoch: [16] [ 560/2502] eta: 0:24:50 lr: 0.000011 loss_cls: 3.9769 (3.9377) grad_norm: 2.3395 (2.3515) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 20:04:26 root] (utils.py 283): INFO Epoch: [16] [ 570/2502] eta: 0:24:42 lr: 0.000011 loss_cls: 4.0632 (3.9376) grad_norm: 2.3314 (2.3516) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 20:04:33 root] (utils.py 283): INFO Epoch: [16] [ 580/2502] eta: 0:24:34 lr: 0.000011 loss_cls: 3.8152 (3.9326) grad_norm: 2.3568 (2.3529) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 20:04:41 root] (utils.py 283): INFO Epoch: [16] [ 590/2502] eta: 0:24:26 lr: 0.000011 loss_cls: 3.8152 (3.9347) grad_norm: 2.3817 (2.3524) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 20:04:48 root] (utils.py 283): INFO Epoch: [16] [ 600/2502] eta: 0:24:18 lr: 0.000011 loss_cls: 4.0011 (3.9286) grad_norm: 2.3509 (2.3530) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 20:04:56 root] (utils.py 283): INFO Epoch: [16] [ 610/2502] eta: 0:24:11 lr: 0.000011 loss_cls: 4.0598 (3.9345) grad_norm: 2.3509 (2.3527) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:04 root] (utils.py 283): INFO Epoch: [16] [ 620/2502] eta: 0:24:03 lr: 0.000011 loss_cls: 4.0849 (3.9325) grad_norm: 2.2947 (2.3525) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:11 root] (utils.py 283): INFO Epoch: [16] [ 630/2502] eta: 0:23:55 lr: 0.000011 loss_cls: 3.8927 (3.9315) grad_norm: 2.3691 (2.3539) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:19 root] (utils.py 283): INFO Epoch: [16] [ 640/2502] eta: 0:23:48 lr: 0.000011 loss_cls: 3.8927 (3.9344) grad_norm: 2.3872 (2.3542) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:27 root] (utils.py 283): INFO Epoch: [16] [ 650/2502] eta: 0:23:40 lr: 0.000011 loss_cls: 3.8391 (3.9292) grad_norm: 2.3878 (2.3551) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:34 root] (utils.py 283): INFO Epoch: [16] [ 660/2502] eta: 0:23:32 lr: 0.000011 loss_cls: 3.5145 (3.9251) grad_norm: 2.3878 (2.3558) time: 0.7710 data: 0.0003 max mem: 8426 +[2024-12-10 20:05:42 root] (utils.py 283): INFO Epoch: [16] [ 670/2502] eta: 0:23:25 lr: 0.000011 loss_cls: 3.7891 (3.9239) grad_norm: 2.3509 (2.3551) time: 0.7733 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:50 root] (utils.py 283): INFO Epoch: [16] [ 680/2502] eta: 0:23:17 lr: 0.000011 loss_cls: 3.9928 (3.9222) grad_norm: 2.3509 (2.3561) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 20:05:57 root] (utils.py 283): INFO Epoch: [16] [ 690/2502] eta: 0:23:09 lr: 0.000011 loss_cls: 4.0167 (3.9255) grad_norm: 2.3491 (2.3555) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 20:06:05 root] (utils.py 283): INFO Epoch: [16] [ 700/2502] eta: 0:23:02 lr: 0.000011 loss_cls: 3.9893 (3.9249) grad_norm: 2.3173 (2.3559) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 20:06:13 root] (utils.py 283): INFO Epoch: [16] [ 710/2502] eta: 0:22:54 lr: 0.000011 loss_cls: 3.7340 (3.9218) grad_norm: 2.3687 (2.3565) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 20:06:20 root] (utils.py 283): INFO Epoch: [16] [ 720/2502] eta: 0:22:46 lr: 0.000011 loss_cls: 3.8373 (3.9228) grad_norm: 2.3382 (2.3562) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 20:06:28 root] (utils.py 283): INFO Epoch: [16] [ 730/2502] eta: 0:22:39 lr: 0.000011 loss_cls: 3.8373 (3.9199) grad_norm: 2.3300 (2.3560) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 20:06:36 root] (utils.py 283): INFO Epoch: [16] [ 740/2502] eta: 0:22:31 lr: 0.000011 loss_cls: 3.6389 (3.9171) grad_norm: 2.3244 (2.3559) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 20:06:43 root] (utils.py 283): INFO Epoch: [16] [ 750/2502] eta: 0:22:23 lr: 0.000011 loss_cls: 3.6735 (3.9157) grad_norm: 2.3201 (2.3556) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:06:51 root] (utils.py 283): INFO Epoch: [16] [ 760/2502] eta: 0:22:15 lr: 0.000011 loss_cls: 3.7976 (3.9127) grad_norm: 2.3369 (2.3567) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 20:06:59 root] (utils.py 283): INFO Epoch: [16] [ 770/2502] eta: 0:22:08 lr: 0.000011 loss_cls: 3.7131 (3.9098) grad_norm: 2.4403 (2.3572) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:07:06 root] (utils.py 283): INFO Epoch: [16] [ 780/2502] eta: 0:22:00 lr: 0.000011 loss_cls: 3.9109 (3.9112) grad_norm: 2.3670 (2.3574) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 20:07:14 root] (utils.py 283): INFO Epoch: [16] [ 790/2502] eta: 0:21:53 lr: 0.000011 loss_cls: 3.9109 (3.9100) grad_norm: 2.3621 (2.3580) time: 0.7762 data: 0.0003 max mem: 8426 +[2024-12-10 20:07:22 root] (utils.py 283): INFO Epoch: [16] [ 800/2502] eta: 0:21:45 lr: 0.000011 loss_cls: 3.6863 (3.9053) grad_norm: 2.3758 (2.3585) time: 0.7772 data: 0.0003 max mem: 8426 +[2024-12-10 20:07:30 root] (utils.py 283): INFO Epoch: [16] [ 810/2502] eta: 0:21:38 lr: 0.000011 loss_cls: 3.5994 (3.9028) grad_norm: 2.4511 (2.3602) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 20:07:37 root] (utils.py 283): INFO Epoch: [16] [ 820/2502] eta: 0:21:30 lr: 0.000011 loss_cls: 3.8126 (3.9013) grad_norm: 2.4195 (2.3606) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 20:07:45 root] (utils.py 283): INFO Epoch: [16] [ 830/2502] eta: 0:21:22 lr: 0.000011 loss_cls: 3.9552 (3.9045) grad_norm: 2.3294 (2.3609) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 20:07:53 root] (utils.py 283): INFO Epoch: [16] [ 840/2502] eta: 0:21:15 lr: 0.000011 loss_cls: 4.2357 (3.9044) grad_norm: 2.3092 (2.3603) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 20:08:00 root] (utils.py 283): INFO Epoch: [16] [ 850/2502] eta: 0:21:07 lr: 0.000011 loss_cls: 3.9238 (3.9037) grad_norm: 2.3092 (2.3608) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 20:08:08 root] (utils.py 283): INFO Epoch: [16] [ 860/2502] eta: 0:20:59 lr: 0.000011 loss_cls: 3.8357 (3.9003) grad_norm: 2.3960 (2.3613) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 20:08:16 root] (utils.py 283): INFO Epoch: [16] [ 870/2502] eta: 0:20:52 lr: 0.000011 loss_cls: 4.0745 (3.9032) grad_norm: 2.3716 (2.3621) time: 0.7704 data: 0.0003 max mem: 8426 +[2024-12-10 20:08:23 root] (utils.py 283): INFO Epoch: [16] [ 880/2502] eta: 0:20:44 lr: 0.000011 loss_cls: 4.1020 (3.8997) grad_norm: 2.3104 (2.3623) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 20:08:31 root] (utils.py 283): INFO Epoch: [16] [ 890/2502] eta: 0:20:36 lr: 0.000011 loss_cls: 3.9774 (3.9014) grad_norm: 2.3207 (2.3621) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 20:08:39 root] (utils.py 283): INFO Epoch: [16] [ 900/2502] eta: 0:20:29 lr: 0.000011 loss_cls: 3.8552 (3.9005) grad_norm: 2.3260 (2.3626) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 20:08:46 root] (utils.py 283): INFO Epoch: [16] [ 910/2502] eta: 0:20:21 lr: 0.000011 loss_cls: 3.7561 (3.8977) grad_norm: 2.3670 (2.3631) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 20:08:54 root] (utils.py 283): INFO Epoch: [16] [ 920/2502] eta: 0:20:13 lr: 0.000011 loss_cls: 3.7100 (3.8967) grad_norm: 2.4298 (2.3640) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 20:09:02 root] (utils.py 283): INFO Epoch: [16] [ 930/2502] eta: 0:20:05 lr: 0.000011 loss_cls: 3.8654 (3.8964) grad_norm: 2.4065 (2.3641) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 20:09:09 root] (utils.py 283): INFO Epoch: [16] [ 940/2502] eta: 0:19:58 lr: 0.000011 loss_cls: 3.9272 (3.8966) grad_norm: 2.3656 (2.3640) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 20:09:17 root] (utils.py 283): INFO Epoch: [16] [ 950/2502] eta: 0:19:50 lr: 0.000011 loss_cls: 4.1112 (3.8984) grad_norm: 2.3170 (2.3637) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 20:09:25 root] (utils.py 283): INFO Epoch: [16] [ 960/2502] eta: 0:19:42 lr: 0.000011 loss_cls: 4.1235 (3.9004) grad_norm: 2.3454 (2.3639) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 20:09:32 root] (utils.py 283): INFO Epoch: [16] [ 970/2502] eta: 0:19:35 lr: 0.000011 loss_cls: 4.1010 (3.9003) grad_norm: 2.3627 (2.3636) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 20:09:40 root] (utils.py 283): INFO Epoch: [16] [ 980/2502] eta: 0:19:27 lr: 0.000011 loss_cls: 4.1010 (3.9015) grad_norm: 2.4107 (2.3640) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 20:09:48 root] (utils.py 283): INFO Epoch: [16] [ 990/2502] eta: 0:19:19 lr: 0.000011 loss_cls: 4.0270 (3.9020) grad_norm: 2.3873 (2.3638) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 20:09:55 root] (utils.py 283): INFO Epoch: [16] [1000/2502] eta: 0:19:12 lr: 0.000011 loss_cls: 4.1338 (3.9043) grad_norm: 2.3322 (2.3635) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 20:10:03 root] (utils.py 283): INFO Epoch: [16] [1010/2502] eta: 0:19:04 lr: 0.000011 loss_cls: 4.1569 (3.9041) grad_norm: 2.3073 (2.3629) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 20:10:10 root] (utils.py 283): INFO Epoch: [16] [1020/2502] eta: 0:18:56 lr: 0.000011 loss_cls: 4.0952 (3.9045) grad_norm: 2.2823 (2.3630) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 20:10:18 root] (utils.py 283): INFO Epoch: [16] [1030/2502] eta: 0:18:48 lr: 0.000011 loss_cls: 3.8758 (3.9027) grad_norm: 2.3750 (2.3628) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 20:10:26 root] (utils.py 283): INFO Epoch: [16] [1040/2502] eta: 0:18:41 lr: 0.000011 loss_cls: 3.8182 (3.9045) grad_norm: 2.3750 (2.3626) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 20:10:33 root] (utils.py 283): INFO Epoch: [16] [1050/2502] eta: 0:18:33 lr: 0.000011 loss_cls: 4.0337 (3.9051) grad_norm: 2.3269 (2.3624) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 20:10:41 root] (utils.py 283): INFO Epoch: [16] [1060/2502] eta: 0:18:25 lr: 0.000011 loss_cls: 4.3482 (3.9071) grad_norm: 2.3666 (2.3625) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 20:10:49 root] (utils.py 283): INFO Epoch: [16] [1070/2502] eta: 0:18:17 lr: 0.000011 loss_cls: 4.2221 (3.9086) grad_norm: 2.3988 (2.3632) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 20:10:56 root] (utils.py 283): INFO Epoch: [16] [1080/2502] eta: 0:18:10 lr: 0.000011 loss_cls: 4.2109 (3.9103) grad_norm: 2.4060 (2.3635) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:04 root] (utils.py 283): INFO Epoch: [16] [1090/2502] eta: 0:18:02 lr: 0.000011 loss_cls: 4.2188 (3.9113) grad_norm: 2.3806 (2.3638) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:12 root] (utils.py 283): INFO Epoch: [16] [1100/2502] eta: 0:17:54 lr: 0.000011 loss_cls: 4.0987 (3.9117) grad_norm: 2.2971 (2.3632) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:19 root] (utils.py 283): INFO Epoch: [16] [1110/2502] eta: 0:17:47 lr: 0.000011 loss_cls: 4.2389 (3.9156) grad_norm: 2.3142 (2.3629) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:27 root] (utils.py 283): INFO Epoch: [16] [1120/2502] eta: 0:17:39 lr: 0.000011 loss_cls: 4.3708 (3.9177) grad_norm: 2.3142 (2.3631) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:35 root] (utils.py 283): INFO Epoch: [16] [1130/2502] eta: 0:17:31 lr: 0.000011 loss_cls: 4.0311 (3.9174) grad_norm: 2.3416 (2.3631) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:42 root] (utils.py 283): INFO Epoch: [16] [1140/2502] eta: 0:17:24 lr: 0.000011 loss_cls: 4.0297 (3.9170) grad_norm: 2.3555 (2.3633) time: 0.7787 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:50 root] (utils.py 283): INFO Epoch: [16] [1150/2502] eta: 0:17:17 lr: 0.000011 loss_cls: 4.0709 (3.9171) grad_norm: 2.3655 (2.3631) time: 0.7827 data: 0.0002 max mem: 8426 +[2024-12-10 20:11:58 root] (utils.py 283): INFO Epoch: [16] [1160/2502] eta: 0:17:09 lr: 0.000011 loss_cls: 4.0885 (3.9172) grad_norm: 2.3655 (2.3634) time: 0.7750 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:06 root] (utils.py 283): INFO Epoch: [16] [1170/2502] eta: 0:17:01 lr: 0.000011 loss_cls: 3.9834 (3.9164) grad_norm: 2.3943 (2.3641) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:13 root] (utils.py 283): INFO Epoch: [16] [1180/2502] eta: 0:16:53 lr: 0.000011 loss_cls: 3.8197 (3.9171) grad_norm: 2.4348 (2.3644) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:21 root] (utils.py 283): INFO Epoch: [16] [1190/2502] eta: 0:16:46 lr: 0.000011 loss_cls: 3.8017 (3.9167) grad_norm: 2.4075 (2.3649) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:29 root] (utils.py 283): INFO Epoch: [16] [1200/2502] eta: 0:16:38 lr: 0.000011 loss_cls: 3.6561 (3.9147) grad_norm: 2.3589 (2.3651) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:36 root] (utils.py 283): INFO Epoch: [16] [1210/2502] eta: 0:16:30 lr: 0.000011 loss_cls: 3.4702 (3.9121) grad_norm: 2.3362 (2.3651) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:44 root] (utils.py 283): INFO Epoch: [16] [1220/2502] eta: 0:16:23 lr: 0.000011 loss_cls: 3.9578 (3.9128) grad_norm: 2.3032 (2.3648) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:52 root] (utils.py 283): INFO Epoch: [16] [1230/2502] eta: 0:16:15 lr: 0.000011 loss_cls: 4.0028 (3.9115) grad_norm: 2.3287 (2.3646) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 20:12:59 root] (utils.py 283): INFO Epoch: [16] [1240/2502] eta: 0:16:08 lr: 0.000011 loss_cls: 4.0028 (3.9122) grad_norm: 2.3287 (2.3645) time: 0.7758 data: 0.0002 max mem: 8426 +[2024-12-10 20:13:07 root] (utils.py 283): INFO Epoch: [16] [1250/2502] eta: 0:16:00 lr: 0.000011 loss_cls: 4.1898 (3.9150) grad_norm: 2.3564 (2.3645) time: 0.7695 data: 0.0003 max mem: 8426 +[2024-12-10 20:13:15 root] (utils.py 283): INFO Epoch: [16] [1260/2502] eta: 0:15:52 lr: 0.000011 loss_cls: 4.2567 (3.9157) grad_norm: 2.3564 (2.3651) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 20:13:22 root] (utils.py 283): INFO Epoch: [16] [1270/2502] eta: 0:15:45 lr: 0.000011 loss_cls: 4.1749 (3.9160) grad_norm: 2.3557 (2.3648) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 20:13:30 root] (utils.py 283): INFO Epoch: [16] [1280/2502] eta: 0:15:37 lr: 0.000011 loss_cls: 4.1224 (3.9158) grad_norm: 2.4004 (2.3651) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 20:13:38 root] (utils.py 283): INFO Epoch: [16] [1290/2502] eta: 0:15:29 lr: 0.000011 loss_cls: 3.8391 (3.9139) grad_norm: 2.3406 (2.3647) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 20:13:46 root] (utils.py 283): INFO Epoch: [16] [1300/2502] eta: 0:15:22 lr: 0.000011 loss_cls: 4.0154 (3.9136) grad_norm: 2.3217 (2.3646) time: 0.7735 data: 0.0002 max mem: 8426 +[2024-12-10 20:13:53 root] (utils.py 283): INFO Epoch: [16] [1310/2502] eta: 0:15:14 lr: 0.000011 loss_cls: 4.0154 (3.9108) grad_norm: 2.3350 (2.3648) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 20:14:01 root] (utils.py 283): INFO Epoch: [16] [1320/2502] eta: 0:15:06 lr: 0.000011 loss_cls: 3.8336 (3.9109) grad_norm: 2.3192 (2.3645) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 20:14:09 root] (utils.py 283): INFO Epoch: [16] [1330/2502] eta: 0:14:59 lr: 0.000011 loss_cls: 4.0015 (3.9116) grad_norm: 2.3427 (2.3649) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 20:14:16 root] (utils.py 283): INFO Epoch: [16] [1340/2502] eta: 0:14:51 lr: 0.000011 loss_cls: 4.1376 (3.9120) grad_norm: 2.3554 (2.3652) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 20:14:24 root] (utils.py 283): INFO Epoch: [16] [1350/2502] eta: 0:14:43 lr: 0.000011 loss_cls: 4.1129 (3.9121) grad_norm: 2.3359 (2.3649) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 20:14:32 root] (utils.py 283): INFO Epoch: [16] [1360/2502] eta: 0:14:36 lr: 0.000011 loss_cls: 3.9910 (3.9122) grad_norm: 2.2955 (2.3649) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 20:14:39 root] (utils.py 283): INFO Epoch: [16] [1370/2502] eta: 0:14:28 lr: 0.000011 loss_cls: 3.9265 (3.9122) grad_norm: 2.3359 (2.3651) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 20:14:47 root] (utils.py 283): INFO Epoch: [16] [1380/2502] eta: 0:14:20 lr: 0.000011 loss_cls: 3.8779 (3.9116) grad_norm: 2.3135 (2.3648) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 20:14:55 root] (utils.py 283): INFO Epoch: [16] [1390/2502] eta: 0:14:13 lr: 0.000011 loss_cls: 3.8779 (3.9104) grad_norm: 2.3021 (2.3648) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:02 root] (utils.py 283): INFO Epoch: [16] [1400/2502] eta: 0:14:05 lr: 0.000011 loss_cls: 3.8059 (3.9102) grad_norm: 2.4083 (2.3654) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:10 root] (utils.py 283): INFO Epoch: [16] [1410/2502] eta: 0:13:57 lr: 0.000011 loss_cls: 3.7944 (3.9096) grad_norm: 2.4138 (2.3656) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:18 root] (utils.py 283): INFO Epoch: [16] [1420/2502] eta: 0:13:50 lr: 0.000011 loss_cls: 3.9851 (3.9092) grad_norm: 2.3158 (2.3655) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:26 root] (utils.py 283): INFO Epoch: [16] [1430/2502] eta: 0:13:42 lr: 0.000011 loss_cls: 4.0370 (3.9078) grad_norm: 2.3587 (2.3656) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:33 root] (utils.py 283): INFO Epoch: [16] [1440/2502] eta: 0:13:35 lr: 0.000011 loss_cls: 3.9408 (3.9072) grad_norm: 2.4258 (2.3662) time: 0.7829 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:41 root] (utils.py 283): INFO Epoch: [16] [1450/2502] eta: 0:13:27 lr: 0.000011 loss_cls: 3.9408 (3.9072) grad_norm: 2.3999 (2.3662) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:49 root] (utils.py 283): INFO Epoch: [16] [1460/2502] eta: 0:13:19 lr: 0.000011 loss_cls: 3.8991 (3.9062) grad_norm: 2.3999 (2.3668) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 20:15:56 root] (utils.py 283): INFO Epoch: [16] [1470/2502] eta: 0:13:11 lr: 0.000011 loss_cls: 3.6080 (3.9043) grad_norm: 2.4160 (2.3671) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:04 root] (utils.py 283): INFO Epoch: [16] [1480/2502] eta: 0:13:04 lr: 0.000011 loss_cls: 4.1017 (3.9050) grad_norm: 2.3960 (2.3670) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:12 root] (utils.py 283): INFO Epoch: [16] [1490/2502] eta: 0:12:56 lr: 0.000011 loss_cls: 4.2127 (3.9055) grad_norm: 2.3765 (2.3671) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:19 root] (utils.py 283): INFO Epoch: [16] [1500/2502] eta: 0:12:48 lr: 0.000011 loss_cls: 4.0780 (3.9046) grad_norm: 2.3433 (2.3669) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:27 root] (utils.py 283): INFO Epoch: [16] [1510/2502] eta: 0:12:41 lr: 0.000011 loss_cls: 3.9110 (3.9048) grad_norm: 2.3257 (2.3673) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:35 root] (utils.py 283): INFO Epoch: [16] [1520/2502] eta: 0:12:33 lr: 0.000011 loss_cls: 3.8646 (3.9031) grad_norm: 2.3316 (2.3677) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:42 root] (utils.py 283): INFO Epoch: [16] [1530/2502] eta: 0:12:25 lr: 0.000011 loss_cls: 4.2759 (3.9049) grad_norm: 2.3368 (2.3678) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:50 root] (utils.py 283): INFO Epoch: [16] [1540/2502] eta: 0:12:18 lr: 0.000011 loss_cls: 4.1917 (3.9061) grad_norm: 2.3368 (2.3678) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 20:16:58 root] (utils.py 283): INFO Epoch: [16] [1550/2502] eta: 0:12:10 lr: 0.000011 loss_cls: 4.0752 (3.9077) grad_norm: 2.4096 (2.3680) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 20:17:05 root] (utils.py 283): INFO Epoch: [16] [1560/2502] eta: 0:12:02 lr: 0.000011 loss_cls: 3.9989 (3.9074) grad_norm: 2.3867 (2.3679) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 20:17:13 root] (utils.py 283): INFO Epoch: [16] [1570/2502] eta: 0:11:55 lr: 0.000011 loss_cls: 3.9149 (3.9077) grad_norm: 2.3318 (2.3677) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 20:17:21 root] (utils.py 283): INFO Epoch: [16] [1580/2502] eta: 0:11:47 lr: 0.000011 loss_cls: 3.9149 (3.9063) grad_norm: 2.3739 (2.3680) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 20:17:28 root] (utils.py 283): INFO Epoch: [16] [1590/2502] eta: 0:11:39 lr: 0.000011 loss_cls: 3.8355 (3.9057) grad_norm: 2.3936 (2.3681) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 20:17:36 root] (utils.py 283): INFO Epoch: [16] [1600/2502] eta: 0:11:32 lr: 0.000011 loss_cls: 3.8858 (3.9059) grad_norm: 2.3599 (2.3682) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 20:17:43 root] (utils.py 283): INFO Epoch: [16] [1610/2502] eta: 0:11:24 lr: 0.000011 loss_cls: 3.7238 (3.9036) grad_norm: 2.3830 (2.3683) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 20:17:51 root] (utils.py 283): INFO Epoch: [16] [1620/2502] eta: 0:11:16 lr: 0.000011 loss_cls: 3.7202 (3.9032) grad_norm: 2.3771 (2.3686) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 20:17:59 root] (utils.py 283): INFO Epoch: [16] [1630/2502] eta: 0:11:09 lr: 0.000011 loss_cls: 4.0269 (3.9026) grad_norm: 2.3976 (2.3689) time: 0.7690 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:06 root] (utils.py 283): INFO Epoch: [16] [1640/2502] eta: 0:11:01 lr: 0.000011 loss_cls: 4.1273 (3.9026) grad_norm: 2.4719 (2.3690) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:14 root] (utils.py 283): INFO Epoch: [16] [1650/2502] eta: 0:10:53 lr: 0.000011 loss_cls: 4.0761 (3.9021) grad_norm: 2.2922 (2.3687) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:22 root] (utils.py 283): INFO Epoch: [16] [1660/2502] eta: 0:10:45 lr: 0.000011 loss_cls: 3.9607 (3.9025) grad_norm: 2.3648 (2.3692) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:29 root] (utils.py 283): INFO Epoch: [16] [1670/2502] eta: 0:10:38 lr: 0.000011 loss_cls: 3.7007 (3.9008) grad_norm: 2.3870 (2.3693) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:37 root] (utils.py 283): INFO Epoch: [16] [1680/2502] eta: 0:10:30 lr: 0.000011 loss_cls: 3.6428 (3.9015) grad_norm: 2.3130 (2.3692) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:45 root] (utils.py 283): INFO Epoch: [16] [1690/2502] eta: 0:10:22 lr: 0.000011 loss_cls: 3.9116 (3.9002) grad_norm: 2.3074 (2.3691) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 20:18:52 root] (utils.py 283): INFO Epoch: [16] [1700/2502] eta: 0:10:15 lr: 0.000011 loss_cls: 3.8434 (3.8996) grad_norm: 2.4216 (2.3694) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 20:19:00 root] (utils.py 283): INFO Epoch: [16] [1710/2502] eta: 0:10:07 lr: 0.000011 loss_cls: 3.9818 (3.9000) grad_norm: 2.3529 (2.3691) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 20:19:08 root] (utils.py 283): INFO Epoch: [16] [1720/2502] eta: 0:09:59 lr: 0.000011 loss_cls: 3.9722 (3.8986) grad_norm: 2.3149 (2.3690) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 20:19:15 root] (utils.py 283): INFO Epoch: [16] [1730/2502] eta: 0:09:52 lr: 0.000011 loss_cls: 3.9722 (3.8995) grad_norm: 2.4009 (2.3693) time: 0.7568 data: 0.0002 max mem: 8426 +[2024-12-10 20:19:23 root] (utils.py 283): INFO Epoch: [16] [1740/2502] eta: 0:09:44 lr: 0.000011 loss_cls: 3.9821 (3.8980) grad_norm: 2.4136 (2.3694) time: 0.7573 data: 0.0003 max mem: 8426 +[2024-12-10 20:19:30 root] (utils.py 283): INFO Epoch: [16] [1750/2502] eta: 0:09:36 lr: 0.000011 loss_cls: 4.0719 (3.8979) grad_norm: 2.3716 (2.3698) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 20:19:38 root] (utils.py 283): INFO Epoch: [16] [1760/2502] eta: 0:09:29 lr: 0.000011 loss_cls: 4.1179 (3.8989) grad_norm: 2.3716 (2.3699) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 20:19:46 root] (utils.py 283): INFO Epoch: [16] [1770/2502] eta: 0:09:21 lr: 0.000011 loss_cls: 4.0516 (3.8992) grad_norm: 2.3328 (2.3703) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 20:19:54 root] (utils.py 283): INFO Epoch: [16] [1780/2502] eta: 0:09:13 lr: 0.000011 loss_cls: 4.0516 (3.8998) grad_norm: 2.3418 (2.3702) time: 0.7726 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:01 root] (utils.py 283): INFO Epoch: [16] [1790/2502] eta: 0:09:06 lr: 0.000011 loss_cls: 3.8162 (3.8975) grad_norm: 2.3793 (2.3704) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:09 root] (utils.py 283): INFO Epoch: [16] [1800/2502] eta: 0:08:58 lr: 0.000011 loss_cls: 3.4390 (3.8968) grad_norm: 2.3718 (2.3700) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:17 root] (utils.py 283): INFO Epoch: [16] [1810/2502] eta: 0:08:50 lr: 0.000011 loss_cls: 3.6014 (3.8956) grad_norm: 2.2739 (2.3698) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:24 root] (utils.py 283): INFO Epoch: [16] [1820/2502] eta: 0:08:43 lr: 0.000011 loss_cls: 3.9854 (3.8961) grad_norm: 2.2836 (2.3694) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:32 root] (utils.py 283): INFO Epoch: [16] [1830/2502] eta: 0:08:35 lr: 0.000011 loss_cls: 4.0260 (3.8959) grad_norm: 2.2836 (2.3691) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:40 root] (utils.py 283): INFO Epoch: [16] [1840/2502] eta: 0:08:27 lr: 0.000011 loss_cls: 3.8791 (3.8956) grad_norm: 2.2713 (2.3688) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:47 root] (utils.py 283): INFO Epoch: [16] [1850/2502] eta: 0:08:20 lr: 0.000011 loss_cls: 4.0663 (3.8959) grad_norm: 2.2623 (2.3682) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 20:20:55 root] (utils.py 283): INFO Epoch: [16] [1860/2502] eta: 0:08:12 lr: 0.000011 loss_cls: 3.9917 (3.8959) grad_norm: 2.2729 (2.3680) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:21:03 root] (utils.py 283): INFO Epoch: [16] [1870/2502] eta: 0:08:04 lr: 0.000011 loss_cls: 3.9917 (3.8963) grad_norm: 2.3648 (2.3681) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 20:21:10 root] (utils.py 283): INFO Epoch: [16] [1880/2502] eta: 0:07:57 lr: 0.000011 loss_cls: 4.2541 (3.8990) grad_norm: 2.3864 (2.3684) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 20:21:18 root] (utils.py 283): INFO Epoch: [16] [1890/2502] eta: 0:07:49 lr: 0.000011 loss_cls: 3.9917 (3.8970) grad_norm: 2.3561 (2.3686) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 20:21:25 root] (utils.py 283): INFO Epoch: [16] [1900/2502] eta: 0:07:41 lr: 0.000011 loss_cls: 3.7766 (3.8967) grad_norm: 2.3561 (2.3688) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 20:21:33 root] (utils.py 283): INFO Epoch: [16] [1910/2502] eta: 0:07:34 lr: 0.000011 loss_cls: 4.0367 (3.8971) grad_norm: 2.3731 (2.3689) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 20:21:41 root] (utils.py 283): INFO Epoch: [16] [1920/2502] eta: 0:07:26 lr: 0.000011 loss_cls: 4.0878 (3.8989) grad_norm: 2.3740 (2.3691) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 20:21:48 root] (utils.py 283): INFO Epoch: [16] [1930/2502] eta: 0:07:18 lr: 0.000011 loss_cls: 4.1069 (3.9008) grad_norm: 2.3763 (2.3693) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 20:21:56 root] (utils.py 283): INFO Epoch: [16] [1940/2502] eta: 0:07:11 lr: 0.000011 loss_cls: 4.0170 (3.8996) grad_norm: 2.3895 (2.3695) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 20:22:04 root] (utils.py 283): INFO Epoch: [16] [1950/2502] eta: 0:07:03 lr: 0.000011 loss_cls: 4.0802 (3.9002) grad_norm: 2.3548 (2.3696) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 20:22:11 root] (utils.py 283): INFO Epoch: [16] [1960/2502] eta: 0:06:55 lr: 0.000011 loss_cls: 4.3101 (3.9018) grad_norm: 2.3548 (2.3697) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 20:22:19 root] (utils.py 283): INFO Epoch: [16] [1970/2502] eta: 0:06:47 lr: 0.000011 loss_cls: 4.1480 (3.9015) grad_norm: 2.3810 (2.3697) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 20:22:27 root] (utils.py 283): INFO Epoch: [16] [1980/2502] eta: 0:06:40 lr: 0.000011 loss_cls: 3.9171 (3.9013) grad_norm: 2.3528 (2.3695) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 20:22:35 root] (utils.py 283): INFO Epoch: [16] [1990/2502] eta: 0:06:32 lr: 0.000011 loss_cls: 3.9238 (3.9011) grad_norm: 2.3422 (2.3697) time: 0.7765 data: 0.0003 max mem: 8426 +[2024-12-10 20:22:42 root] (utils.py 283): INFO Epoch: [16] [2000/2502] eta: 0:06:25 lr: 0.000011 loss_cls: 4.1789 (3.9014) grad_norm: 2.3807 (2.3700) time: 0.7817 data: 0.0003 max mem: 8426 +[2024-12-10 20:22:50 root] (utils.py 283): INFO Epoch: [16] [2010/2502] eta: 0:06:17 lr: 0.000011 loss_cls: 4.1274 (3.9018) grad_norm: 2.4418 (2.3703) time: 0.7852 data: 0.0002 max mem: 8426 +[2024-12-10 20:22:58 root] (utils.py 283): INFO Epoch: [16] [2020/2502] eta: 0:06:09 lr: 0.000011 loss_cls: 3.9610 (3.9020) grad_norm: 2.4423 (2.3705) time: 0.7855 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:06 root] (utils.py 283): INFO Epoch: [16] [2030/2502] eta: 0:06:02 lr: 0.000011 loss_cls: 3.9610 (3.9023) grad_norm: 2.3692 (2.3707) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:13 root] (utils.py 283): INFO Epoch: [16] [2040/2502] eta: 0:05:54 lr: 0.000011 loss_cls: 4.2991 (3.9035) grad_norm: 2.3563 (2.3709) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:21 root] (utils.py 283): INFO Epoch: [16] [2050/2502] eta: 0:05:46 lr: 0.000011 loss_cls: 4.1039 (3.9026) grad_norm: 2.3570 (2.3710) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 20:23:29 root] (utils.py 283): INFO Epoch: [16] [2060/2502] eta: 0:05:39 lr: 0.000011 loss_cls: 3.4293 (3.9011) grad_norm: 2.3668 (2.3711) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:36 root] (utils.py 283): INFO Epoch: [16] [2070/2502] eta: 0:05:31 lr: 0.000011 loss_cls: 3.9885 (3.9012) grad_norm: 2.4196 (2.3717) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:44 root] (utils.py 283): INFO Epoch: [16] [2080/2502] eta: 0:05:23 lr: 0.000011 loss_cls: 3.9885 (3.9012) grad_norm: 2.4324 (2.3718) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:52 root] (utils.py 283): INFO Epoch: [16] [2090/2502] eta: 0:05:16 lr: 0.000011 loss_cls: 4.0816 (3.9023) grad_norm: 2.3910 (2.3721) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 20:23:59 root] (utils.py 283): INFO Epoch: [16] [2100/2502] eta: 0:05:08 lr: 0.000011 loss_cls: 4.0908 (3.9023) grad_norm: 2.3958 (2.3722) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:07 root] (utils.py 283): INFO Epoch: [16] [2110/2502] eta: 0:05:00 lr: 0.000011 loss_cls: 3.8750 (3.9025) grad_norm: 2.3450 (2.3719) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:15 root] (utils.py 283): INFO Epoch: [16] [2120/2502] eta: 0:04:53 lr: 0.000011 loss_cls: 3.8750 (3.9024) grad_norm: 2.3129 (2.3718) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:22 root] (utils.py 283): INFO Epoch: [16] [2130/2502] eta: 0:04:45 lr: 0.000011 loss_cls: 3.7953 (3.9015) grad_norm: 2.3387 (2.3717) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:30 root] (utils.py 283): INFO Epoch: [16] [2140/2502] eta: 0:04:37 lr: 0.000011 loss_cls: 3.8512 (3.9012) grad_norm: 2.3672 (2.3718) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:38 root] (utils.py 283): INFO Epoch: [16] [2150/2502] eta: 0:04:30 lr: 0.000011 loss_cls: 4.2198 (3.9033) grad_norm: 2.4248 (2.3719) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:45 root] (utils.py 283): INFO Epoch: [16] [2160/2502] eta: 0:04:22 lr: 0.000011 loss_cls: 4.2198 (3.9029) grad_norm: 2.3648 (2.3723) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 20:24:53 root] (utils.py 283): INFO Epoch: [16] [2170/2502] eta: 0:04:14 lr: 0.000011 loss_cls: 4.1405 (3.9042) grad_norm: 2.3443 (2.3720) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:01 root] (utils.py 283): INFO Epoch: [16] [2180/2502] eta: 0:04:07 lr: 0.000011 loss_cls: 3.9473 (3.9036) grad_norm: 2.3616 (2.3722) time: 0.7778 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:09 root] (utils.py 283): INFO Epoch: [16] [2190/2502] eta: 0:03:59 lr: 0.000011 loss_cls: 3.8591 (3.9039) grad_norm: 2.3464 (2.3723) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:16 root] (utils.py 283): INFO Epoch: [16] [2200/2502] eta: 0:03:51 lr: 0.000011 loss_cls: 3.9918 (3.9043) grad_norm: 2.3464 (2.3721) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:24 root] (utils.py 283): INFO Epoch: [16] [2210/2502] eta: 0:03:44 lr: 0.000011 loss_cls: 4.0770 (3.9044) grad_norm: 2.3561 (2.3721) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:32 root] (utils.py 283): INFO Epoch: [16] [2220/2502] eta: 0:03:36 lr: 0.000011 loss_cls: 3.9700 (3.9042) grad_norm: 2.3693 (2.3722) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:39 root] (utils.py 283): INFO Epoch: [16] [2230/2502] eta: 0:03:28 lr: 0.000011 loss_cls: 4.0875 (3.9055) grad_norm: 2.3693 (2.3721) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:47 root] (utils.py 283): INFO Epoch: [16] [2240/2502] eta: 0:03:21 lr: 0.000011 loss_cls: 3.8666 (3.9030) grad_norm: 2.3081 (2.3718) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 20:25:55 root] (utils.py 283): INFO Epoch: [16] [2250/2502] eta: 0:03:13 lr: 0.000011 loss_cls: 3.4786 (3.9032) grad_norm: 2.3557 (2.3719) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:02 root] (utils.py 283): INFO Epoch: [16] [2260/2502] eta: 0:03:05 lr: 0.000011 loss_cls: 4.0366 (3.9021) grad_norm: 2.3557 (2.3719) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:10 root] (utils.py 283): INFO Epoch: [16] [2270/2502] eta: 0:02:57 lr: 0.000011 loss_cls: 4.1277 (3.9028) grad_norm: 2.4215 (2.3722) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:17 root] (utils.py 283): INFO Epoch: [16] [2280/2502] eta: 0:02:50 lr: 0.000011 loss_cls: 4.2292 (3.9032) grad_norm: 2.4220 (2.3725) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:25 root] (utils.py 283): INFO Epoch: [16] [2290/2502] eta: 0:02:42 lr: 0.000011 loss_cls: 4.0558 (3.9041) grad_norm: 2.3989 (2.3726) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:33 root] (utils.py 283): INFO Epoch: [16] [2300/2502] eta: 0:02:34 lr: 0.000011 loss_cls: 4.1268 (3.9051) grad_norm: 2.3996 (2.3730) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:40 root] (utils.py 283): INFO Epoch: [16] [2310/2502] eta: 0:02:27 lr: 0.000011 loss_cls: 4.1084 (3.9057) grad_norm: 2.3486 (2.3728) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 20:26:48 root] (utils.py 283): INFO Epoch: [16] [2320/2502] eta: 0:02:19 lr: 0.000011 loss_cls: 4.0360 (3.9052) grad_norm: 2.3385 (2.3729) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 20:26:56 root] (utils.py 283): INFO Epoch: [16] [2330/2502] eta: 0:02:11 lr: 0.000011 loss_cls: 3.9195 (3.9052) grad_norm: 2.3617 (2.3729) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:03 root] (utils.py 283): INFO Epoch: [16] [2340/2502] eta: 0:02:04 lr: 0.000011 loss_cls: 3.9769 (3.9050) grad_norm: 2.3587 (2.3728) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:11 root] (utils.py 283): INFO Epoch: [16] [2350/2502] eta: 0:01:56 lr: 0.000011 loss_cls: 3.9530 (3.9037) grad_norm: 2.3341 (2.3727) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:19 root] (utils.py 283): INFO Epoch: [16] [2360/2502] eta: 0:01:48 lr: 0.000011 loss_cls: 3.9794 (3.9041) grad_norm: 2.3345 (2.3727) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:26 root] (utils.py 283): INFO Epoch: [16] [2370/2502] eta: 0:01:41 lr: 0.000011 loss_cls: 4.2411 (3.9050) grad_norm: 2.3600 (2.3727) time: 0.7789 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:34 root] (utils.py 283): INFO Epoch: [16] [2380/2502] eta: 0:01:33 lr: 0.000011 loss_cls: 4.2849 (3.9055) grad_norm: 2.4306 (2.3728) time: 0.7762 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:42 root] (utils.py 283): INFO Epoch: [16] [2390/2502] eta: 0:01:25 lr: 0.000011 loss_cls: 4.1242 (3.9047) grad_norm: 2.3914 (2.3731) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:50 root] (utils.py 283): INFO Epoch: [16] [2400/2502] eta: 0:01:18 lr: 0.000011 loss_cls: 4.0654 (3.9052) grad_norm: 2.3899 (2.3732) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 20:27:57 root] (utils.py 283): INFO Epoch: [16] [2410/2502] eta: 0:01:10 lr: 0.000011 loss_cls: 4.0549 (3.9042) grad_norm: 2.3765 (2.3736) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 20:28:05 root] (utils.py 283): INFO Epoch: [16] [2420/2502] eta: 0:01:02 lr: 0.000011 loss_cls: 3.8483 (3.9046) grad_norm: 2.3765 (2.3737) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 20:28:13 root] (utils.py 283): INFO Epoch: [16] [2430/2502] eta: 0:00:55 lr: 0.000011 loss_cls: 4.0585 (3.9049) grad_norm: 2.3969 (2.3740) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 20:28:20 root] (utils.py 283): INFO Epoch: [16] [2440/2502] eta: 0:00:47 lr: 0.000011 loss_cls: 4.0971 (3.9049) grad_norm: 2.3368 (2.3739) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 20:28:28 root] (utils.py 283): INFO Epoch: [16] [2450/2502] eta: 0:00:39 lr: 0.000011 loss_cls: 4.0971 (3.9055) grad_norm: 2.3327 (2.3739) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 20:28:36 root] (utils.py 283): INFO Epoch: [16] [2460/2502] eta: 0:00:32 lr: 0.000011 loss_cls: 3.9452 (3.9059) grad_norm: 2.4007 (2.3740) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 20:28:43 root] (utils.py 283): INFO Epoch: [16] [2470/2502] eta: 0:00:24 lr: 0.000011 loss_cls: 4.2913 (3.9069) grad_norm: 2.4318 (2.3742) time: 0.7711 data: 0.0003 max mem: 8426 +[2024-12-10 20:28:51 root] (utils.py 283): INFO Epoch: [16] [2480/2502] eta: 0:00:16 lr: 0.000011 loss_cls: 4.2275 (3.9070) grad_norm: 2.3588 (2.3741) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 20:28:59 root] (utils.py 283): INFO Epoch: [16] [2490/2502] eta: 0:00:09 lr: 0.000011 loss_cls: 4.1238 (3.9073) grad_norm: 2.3053 (2.3738) time: 0.7880 data: 0.0240 max mem: 8426 +[2024-12-10 20:29:07 root] (utils.py 283): INFO Epoch: [16] [2500/2502] eta: 0:00:01 lr: 0.000011 loss_cls: 3.8651 (3.9069) grad_norm: 2.3053 (2.3737) time: 0.7891 data: 0.0240 max mem: 8426 +[2024-12-10 20:29:08 root] (utils.py 283): INFO Epoch: [16] [2501/2502] eta: 0:00:00 lr: 0.000011 loss_cls: 4.0354 (3.9071) grad_norm: 2.3170 (2.3737) time: 0.7891 data: 0.0240 max mem: 8426 +[2024-12-10 20:29:08 root] (utils.py 297): INFO Epoch: [16] Total time: 0:32:00 (0.7675 s / it) +[2024-12-10 20:29:08 root] (engine.py 179): INFO Averaged stats:lr: 0.000011 loss_cls: 4.0354 (3.9084) grad_norm: 2.3170 (2.3737) +[2024-12-10 20:29:08 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6008 (0.6008) acc1: 87.5000 (87.5000) acc3: 96.0938 (96.0938) acc5: 99.2188 (99.2188) time: 0.1278 data: 0.0004 max mem: 8426 +[2024-12-10 20:29:09 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7226 (0.7930) acc1: 85.9375 (82.8125) acc3: 95.3125 (93.8210) acc5: 97.6562 (97.0170) time: 0.1279 data: 0.0003 max mem: 8426 +[2024-12-10 20:29:11 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8573 (0.8521) acc1: 80.4688 (81.4360) acc3: 92.1875 (92.9315) acc5: 95.3125 (95.7961) time: 0.1279 data: 0.0003 max mem: 8426 +[2024-12-10 20:29:12 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9591 (0.8681) acc1: 78.9062 (80.4688) acc3: 92.9688 (93.0948) acc5: 95.3125 (95.9173) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 20:29:13 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8381 (0.8586) acc1: 79.6875 (80.8498) acc3: 93.7500 (93.1974) acc5: 96.0938 (95.8651) time: 0.1349 data: 0.0073 max mem: 8426 +[2024-12-10 20:29:15 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0424 (0.9454) acc1: 75.7812 (78.8297) acc3: 89.0625 (91.8352) acc5: 93.7500 (94.7917) time: 0.1492 data: 0.0206 max mem: 8426 +[2024-12-10 20:29:16 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2725 (0.9917) acc1: 72.6562 (78.0738) acc3: 85.9375 (90.9324) acc5: 89.0625 (93.9677) time: 0.1534 data: 0.0248 max mem: 8426 +[2024-12-10 20:29:18 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2102 (1.0312) acc1: 73.4375 (77.2337) acc3: 86.7188 (90.4159) acc5: 89.0625 (93.5079) time: 0.1394 data: 0.0114 max mem: 8426 +[2024-12-10 20:29:19 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2098 (1.0640) acc1: 73.4375 (76.4757) acc3: 86.7188 (89.8438) acc5: 89.8438 (93.0266) time: 0.1338 data: 0.0055 max mem: 8426 +[2024-12-10 20:29:21 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2313 (1.0910) acc1: 70.3125 (75.8070) acc3: 85.9375 (89.4660) acc5: 89.8438 (92.7541) time: 0.1573 data: 0.0291 max mem: 8426 +[2024-12-10 20:29:22 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1270 (1.0798) acc1: 73.4375 (75.9680) acc3: 89.0625 (89.6240) acc5: 91.4062 (92.8880) time: 0.1547 data: 0.0272 max mem: 8426 +[2024-12-10 20:29:22 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1407 s / it) +[2024-12-10 20:29:22 root] (engine.py 264): INFO * Acc@1 75.722 Acc@3 89.656 Acc@5 92.922 loss 1.081 flops 1.285 layer_flops 1.251 +[2024-12-10 20:29:22 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.7% +[2024-12-10 20:29:22 root] (main.py 576): INFO Max accuracy: 75.72% +[2024-12-10 20:29:23 root] (utils.py 283): INFO Epoch: [17] [ 0/2502] eta: 0:33:18 lr: 0.000010 loss_cls: 4.2110 (4.2110) grad_norm: 2.2057 (2.2057) time: 0.7988 data: 0.0005 max mem: 8426 +[2024-12-10 20:29:30 root] (utils.py 283): INFO Epoch: [17] [ 10/2502] eta: 0:31:51 lr: 0.000010 loss_cls: 4.2110 (4.0574) grad_norm: 2.3224 (2.3229) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 20:29:38 root] (utils.py 283): INFO Epoch: [17] [ 20/2502] eta: 0:31:40 lr: 0.000010 loss_cls: 4.1127 (3.9136) grad_norm: 2.3107 (2.3193) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 20:29:46 root] (utils.py 283): INFO Epoch: [17] [ 30/2502] eta: 0:31:31 lr: 0.000010 loss_cls: 3.3681 (3.7175) grad_norm: 2.3126 (2.3546) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 20:29:53 root] (utils.py 283): INFO Epoch: [17] [ 40/2502] eta: 0:31:21 lr: 0.000010 loss_cls: 3.8241 (3.7977) grad_norm: 2.3813 (2.3723) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:01 root] (utils.py 283): INFO Epoch: [17] [ 50/2502] eta: 0:31:12 lr: 0.000010 loss_cls: 4.0727 (3.8146) grad_norm: 2.3378 (2.3502) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:09 root] (utils.py 283): INFO Epoch: [17] [ 60/2502] eta: 0:31:05 lr: 0.000010 loss_cls: 4.0181 (3.8329) grad_norm: 2.2851 (2.3454) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:16 root] (utils.py 283): INFO Epoch: [17] [ 70/2502] eta: 0:30:57 lr: 0.000010 loss_cls: 3.8763 (3.7911) grad_norm: 2.3537 (2.3474) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:24 root] (utils.py 283): INFO Epoch: [17] [ 80/2502] eta: 0:30:50 lr: 0.000010 loss_cls: 3.5095 (3.7786) grad_norm: 2.3366 (2.3402) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:32 root] (utils.py 283): INFO Epoch: [17] [ 90/2502] eta: 0:30:43 lr: 0.000010 loss_cls: 3.7400 (3.7927) grad_norm: 2.2768 (2.3366) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:39 root] (utils.py 283): INFO Epoch: [17] [ 100/2502] eta: 0:30:36 lr: 0.000010 loss_cls: 4.2660 (3.8230) grad_norm: 2.3216 (2.3427) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:47 root] (utils.py 283): INFO Epoch: [17] [ 110/2502] eta: 0:30:28 lr: 0.000010 loss_cls: 4.2887 (3.8485) grad_norm: 2.3495 (2.3521) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 20:30:54 root] (utils.py 283): INFO Epoch: [17] [ 120/2502] eta: 0:30:19 lr: 0.000010 loss_cls: 4.2887 (3.8646) grad_norm: 2.3673 (2.3524) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:02 root] (utils.py 283): INFO Epoch: [17] [ 130/2502] eta: 0:30:13 lr: 0.000010 loss_cls: 4.1042 (3.8529) grad_norm: 2.3431 (2.3547) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:10 root] (utils.py 283): INFO Epoch: [17] [ 140/2502] eta: 0:30:04 lr: 0.000010 loss_cls: 3.9258 (3.8540) grad_norm: 2.3431 (2.3568) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:17 root] (utils.py 283): INFO Epoch: [17] [ 150/2502] eta: 0:29:55 lr: 0.000010 loss_cls: 3.9258 (3.8466) grad_norm: 2.3663 (2.3615) time: 0.7563 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:25 root] (utils.py 283): INFO Epoch: [17] [ 160/2502] eta: 0:29:47 lr: 0.000010 loss_cls: 3.3703 (3.8325) grad_norm: 2.3396 (2.3582) time: 0.7555 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:33 root] (utils.py 283): INFO Epoch: [17] [ 170/2502] eta: 0:29:39 lr: 0.000010 loss_cls: 3.4998 (3.8350) grad_norm: 2.2825 (2.3618) time: 0.7582 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:40 root] (utils.py 283): INFO Epoch: [17] [ 180/2502] eta: 0:29:30 lr: 0.000010 loss_cls: 3.8821 (3.8418) grad_norm: 2.3250 (2.3600) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:48 root] (utils.py 283): INFO Epoch: [17] [ 190/2502] eta: 0:29:22 lr: 0.000010 loss_cls: 3.8033 (3.8315) grad_norm: 2.3613 (2.3648) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 20:31:55 root] (utils.py 283): INFO Epoch: [17] [ 200/2502] eta: 0:29:14 lr: 0.000010 loss_cls: 4.3199 (3.8602) grad_norm: 2.4183 (2.3688) time: 0.7568 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:03 root] (utils.py 283): INFO Epoch: [17] [ 210/2502] eta: 0:29:05 lr: 0.000010 loss_cls: 4.3208 (3.8693) grad_norm: 2.3956 (2.3671) time: 0.7555 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:10 root] (utils.py 283): INFO Epoch: [17] [ 220/2502] eta: 0:28:59 lr: 0.000010 loss_cls: 4.0984 (3.8655) grad_norm: 2.3888 (2.3695) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:18 root] (utils.py 283): INFO Epoch: [17] [ 230/2502] eta: 0:28:51 lr: 0.000010 loss_cls: 3.9588 (3.8678) grad_norm: 2.3921 (2.3707) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:26 root] (utils.py 283): INFO Epoch: [17] [ 240/2502] eta: 0:28:44 lr: 0.000010 loss_cls: 3.9651 (3.8678) grad_norm: 2.4061 (2.3782) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:33 root] (utils.py 283): INFO Epoch: [17] [ 250/2502] eta: 0:28:37 lr: 0.000010 loss_cls: 3.5608 (3.8623) grad_norm: 2.4061 (2.3793) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:41 root] (utils.py 283): INFO Epoch: [17] [ 260/2502] eta: 0:28:29 lr: 0.000010 loss_cls: 3.5608 (3.8602) grad_norm: 2.3724 (2.3801) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 20:32:49 root] (utils.py 283): INFO Epoch: [17] [ 270/2502] eta: 0:28:21 lr: 0.000010 loss_cls: 3.9809 (3.8718) grad_norm: 2.3724 (2.3811) time: 0.7578 data: 0.0003 max mem: 8426 +[2024-12-10 20:32:56 root] (utils.py 283): INFO Epoch: [17] [ 280/2502] eta: 0:28:13 lr: 0.000010 loss_cls: 3.8817 (3.8632) grad_norm: 2.3532 (2.3805) time: 0.7562 data: 0.0002 max mem: 8426 +[2024-12-10 20:33:04 root] (utils.py 283): INFO Epoch: [17] [ 290/2502] eta: 0:28:05 lr: 0.000010 loss_cls: 4.0764 (3.8718) grad_norm: 2.3055 (2.3793) time: 0.7576 data: 0.0003 max mem: 8426 +[2024-12-10 20:33:11 root] (utils.py 283): INFO Epoch: [17] [ 300/2502] eta: 0:27:57 lr: 0.000010 loss_cls: 4.1561 (3.8718) grad_norm: 2.3584 (2.3802) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 20:33:19 root] (utils.py 283): INFO Epoch: [17] [ 310/2502] eta: 0:27:49 lr: 0.000010 loss_cls: 4.0399 (3.8734) grad_norm: 2.3769 (2.3794) time: 0.7563 data: 0.0003 max mem: 8426 +[2024-12-10 20:33:27 root] (utils.py 283): INFO Epoch: [17] [ 320/2502] eta: 0:27:41 lr: 0.000010 loss_cls: 4.0274 (3.8735) grad_norm: 2.3098 (2.3789) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 20:33:34 root] (utils.py 283): INFO Epoch: [17] [ 330/2502] eta: 0:27:34 lr: 0.000010 loss_cls: 4.1472 (3.8800) grad_norm: 2.3552 (2.3797) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 20:33:42 root] (utils.py 283): INFO Epoch: [17] [ 340/2502] eta: 0:27:26 lr: 0.000010 loss_cls: 4.1592 (3.8861) grad_norm: 2.3863 (2.3803) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 20:33:50 root] (utils.py 283): INFO Epoch: [17] [ 350/2502] eta: 0:27:20 lr: 0.000010 loss_cls: 4.1320 (3.8841) grad_norm: 2.3142 (2.3795) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 20:33:57 root] (utils.py 283): INFO Epoch: [17] [ 360/2502] eta: 0:27:12 lr: 0.000010 loss_cls: 3.4504 (3.8744) grad_norm: 2.3612 (2.3798) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 20:34:05 root] (utils.py 283): INFO Epoch: [17] [ 370/2502] eta: 0:27:04 lr: 0.000010 loss_cls: 3.8098 (3.8789) grad_norm: 2.4098 (2.3806) time: 0.7569 data: 0.0002 max mem: 8426 +[2024-12-10 20:34:12 root] (utils.py 283): INFO Epoch: [17] [ 380/2502] eta: 0:26:56 lr: 0.000010 loss_cls: 4.0089 (3.8806) grad_norm: 2.3657 (2.3792) time: 0.7583 data: 0.0003 max mem: 8426 +[2024-12-10 20:34:20 root] (utils.py 283): INFO Epoch: [17] [ 390/2502] eta: 0:26:48 lr: 0.000010 loss_cls: 3.7678 (3.8760) grad_norm: 2.3237 (2.3804) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 20:34:28 root] (utils.py 283): INFO Epoch: [17] [ 400/2502] eta: 0:26:41 lr: 0.000010 loss_cls: 3.5872 (3.8723) grad_norm: 2.3571 (2.3805) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 20:34:35 root] (utils.py 283): INFO Epoch: [17] [ 410/2502] eta: 0:26:33 lr: 0.000010 loss_cls: 3.4644 (3.8637) grad_norm: 2.3571 (2.3802) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 20:34:43 root] (utils.py 283): INFO Epoch: [17] [ 420/2502] eta: 0:26:26 lr: 0.000010 loss_cls: 3.7041 (3.8673) grad_norm: 2.3945 (2.3811) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:34:51 root] (utils.py 283): INFO Epoch: [17] [ 430/2502] eta: 0:26:18 lr: 0.000010 loss_cls: 3.9823 (3.8639) grad_norm: 2.4189 (2.3816) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 20:34:58 root] (utils.py 283): INFO Epoch: [17] [ 440/2502] eta: 0:26:11 lr: 0.000010 loss_cls: 3.7715 (3.8653) grad_norm: 2.3477 (2.3807) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 20:35:06 root] (utils.py 283): INFO Epoch: [17] [ 450/2502] eta: 0:26:03 lr: 0.000010 loss_cls: 4.0552 (3.8693) grad_norm: 2.3094 (2.3795) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 20:35:13 root] (utils.py 283): INFO Epoch: [17] [ 460/2502] eta: 0:25:55 lr: 0.000010 loss_cls: 4.0338 (3.8623) grad_norm: 2.3325 (2.3787) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 20:35:21 root] (utils.py 283): INFO Epoch: [17] [ 470/2502] eta: 0:25:48 lr: 0.000010 loss_cls: 3.7573 (3.8626) grad_norm: 2.3224 (2.3776) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 20:35:29 root] (utils.py 283): INFO Epoch: [17] [ 480/2502] eta: 0:25:40 lr: 0.000010 loss_cls: 3.8673 (3.8653) grad_norm: 2.3102 (2.3772) time: 0.7590 data: 0.0003 max mem: 8426 +[2024-12-10 20:35:36 root] (utils.py 283): INFO Epoch: [17] [ 490/2502] eta: 0:25:32 lr: 0.000010 loss_cls: 3.7564 (3.8624) grad_norm: 2.3054 (2.3755) time: 0.7556 data: 0.0003 max mem: 8426 +[2024-12-10 20:35:44 root] (utils.py 283): INFO Epoch: [17] [ 500/2502] eta: 0:25:24 lr: 0.000010 loss_cls: 3.9434 (3.8683) grad_norm: 2.3103 (2.3758) time: 0.7550 data: 0.0003 max mem: 8426 +[2024-12-10 20:35:51 root] (utils.py 283): INFO Epoch: [17] [ 510/2502] eta: 0:25:17 lr: 0.000010 loss_cls: 4.0401 (3.8691) grad_norm: 2.3117 (2.3747) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 20:35:59 root] (utils.py 283): INFO Epoch: [17] [ 520/2502] eta: 0:25:09 lr: 0.000010 loss_cls: 3.9507 (3.8709) grad_norm: 2.3357 (2.3752) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 20:36:07 root] (utils.py 283): INFO Epoch: [17] [ 530/2502] eta: 0:25:02 lr: 0.000010 loss_cls: 4.1221 (3.8751) grad_norm: 2.3626 (2.3753) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 20:36:14 root] (utils.py 283): INFO Epoch: [17] [ 540/2502] eta: 0:24:54 lr: 0.000010 loss_cls: 4.1221 (3.8764) grad_norm: 2.3120 (2.3741) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 20:36:22 root] (utils.py 283): INFO Epoch: [17] [ 550/2502] eta: 0:24:47 lr: 0.000010 loss_cls: 4.1491 (3.8806) grad_norm: 2.3120 (2.3737) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:36:30 root] (utils.py 283): INFO Epoch: [17] [ 560/2502] eta: 0:24:39 lr: 0.000010 loss_cls: 4.1249 (3.8837) grad_norm: 2.3757 (2.3744) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:36:37 root] (utils.py 283): INFO Epoch: [17] [ 570/2502] eta: 0:24:32 lr: 0.000010 loss_cls: 3.9574 (3.8820) grad_norm: 2.2909 (2.3728) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 20:36:45 root] (utils.py 283): INFO Epoch: [17] [ 580/2502] eta: 0:24:24 lr: 0.000010 loss_cls: 4.0378 (3.8839) grad_norm: 2.2898 (2.3727) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 20:36:52 root] (utils.py 283): INFO Epoch: [17] [ 590/2502] eta: 0:24:16 lr: 0.000010 loss_cls: 3.9253 (3.8843) grad_norm: 2.3534 (2.3724) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 20:37:00 root] (utils.py 283): INFO Epoch: [17] [ 600/2502] eta: 0:24:09 lr: 0.000010 loss_cls: 3.8214 (3.8780) grad_norm: 2.3534 (2.3716) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 20:37:08 root] (utils.py 283): INFO Epoch: [17] [ 610/2502] eta: 0:24:01 lr: 0.000010 loss_cls: 3.8555 (3.8774) grad_norm: 2.2743 (2.3704) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 20:37:15 root] (utils.py 283): INFO Epoch: [17] [ 620/2502] eta: 0:23:54 lr: 0.000010 loss_cls: 3.8929 (3.8755) grad_norm: 2.2971 (2.3710) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 20:37:23 root] (utils.py 283): INFO Epoch: [17] [ 630/2502] eta: 0:23:47 lr: 0.000010 loss_cls: 3.8929 (3.8756) grad_norm: 2.3473 (2.3703) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 20:37:31 root] (utils.py 283): INFO Epoch: [17] [ 640/2502] eta: 0:23:39 lr: 0.000010 loss_cls: 4.0860 (3.8779) grad_norm: 2.3192 (2.3706) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 20:37:38 root] (utils.py 283): INFO Epoch: [17] [ 650/2502] eta: 0:23:32 lr: 0.000010 loss_cls: 4.2499 (3.8831) grad_norm: 2.3577 (2.3703) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 20:37:46 root] (utils.py 283): INFO Epoch: [17] [ 660/2502] eta: 0:23:24 lr: 0.000010 loss_cls: 4.2095 (3.8832) grad_norm: 2.3647 (2.3698) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 20:37:54 root] (utils.py 283): INFO Epoch: [17] [ 670/2502] eta: 0:23:17 lr: 0.000010 loss_cls: 4.1398 (3.8845) grad_norm: 2.3734 (2.3704) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 20:38:02 root] (utils.py 283): INFO Epoch: [17] [ 680/2502] eta: 0:23:09 lr: 0.000010 loss_cls: 4.0787 (3.8868) grad_norm: 2.4085 (2.3701) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 20:38:09 root] (utils.py 283): INFO Epoch: [17] [ 690/2502] eta: 0:23:02 lr: 0.000010 loss_cls: 4.0594 (3.8871) grad_norm: 2.3454 (2.3704) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 20:38:17 root] (utils.py 283): INFO Epoch: [17] [ 700/2502] eta: 0:22:54 lr: 0.000010 loss_cls: 3.9608 (3.8891) grad_norm: 2.3892 (2.3712) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 20:38:24 root] (utils.py 283): INFO Epoch: [17] [ 710/2502] eta: 0:22:46 lr: 0.000010 loss_cls: 4.0931 (3.8936) grad_norm: 2.3892 (2.3726) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 20:38:32 root] (utils.py 283): INFO Epoch: [17] [ 720/2502] eta: 0:22:38 lr: 0.000010 loss_cls: 4.3831 (3.8969) grad_norm: 2.3531 (2.3725) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 20:38:40 root] (utils.py 283): INFO Epoch: [17] [ 730/2502] eta: 0:22:31 lr: 0.000010 loss_cls: 4.1732 (3.8964) grad_norm: 2.3531 (2.3727) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 20:38:47 root] (utils.py 283): INFO Epoch: [17] [ 740/2502] eta: 0:22:23 lr: 0.000010 loss_cls: 4.1059 (3.8978) grad_norm: 2.3542 (2.3725) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 20:38:55 root] (utils.py 283): INFO Epoch: [17] [ 750/2502] eta: 0:22:16 lr: 0.000010 loss_cls: 4.1059 (3.9004) grad_norm: 2.3265 (2.3720) time: 0.7608 data: 0.0003 max mem: 8426 +[2024-12-10 20:39:02 root] (utils.py 283): INFO Epoch: [17] [ 760/2502] eta: 0:22:08 lr: 0.000010 loss_cls: 4.2000 (3.9043) grad_norm: 2.3402 (2.3718) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 20:39:10 root] (utils.py 283): INFO Epoch: [17] [ 770/2502] eta: 0:22:00 lr: 0.000010 loss_cls: 4.0457 (3.9001) grad_norm: 2.3664 (2.3715) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:39:18 root] (utils.py 283): INFO Epoch: [17] [ 780/2502] eta: 0:21:53 lr: 0.000010 loss_cls: 4.1180 (3.9026) grad_norm: 2.3176 (2.3712) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 20:39:25 root] (utils.py 283): INFO Epoch: [17] [ 790/2502] eta: 0:21:45 lr: 0.000010 loss_cls: 4.2135 (3.9016) grad_norm: 2.3738 (2.3714) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 20:39:33 root] (utils.py 283): INFO Epoch: [17] [ 800/2502] eta: 0:21:38 lr: 0.000010 loss_cls: 3.9146 (3.9030) grad_norm: 2.4036 (2.3722) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:39:41 root] (utils.py 283): INFO Epoch: [17] [ 810/2502] eta: 0:21:31 lr: 0.000010 loss_cls: 3.9716 (3.9050) grad_norm: 2.3841 (2.3722) time: 0.7769 data: 0.0002 max mem: 8426 +[2024-12-10 20:39:49 root] (utils.py 283): INFO Epoch: [17] [ 820/2502] eta: 0:21:24 lr: 0.000010 loss_cls: 3.9331 (3.9022) grad_norm: 2.3297 (2.3716) time: 0.7866 data: 0.0002 max mem: 8426 +[2024-12-10 20:39:56 root] (utils.py 283): INFO Epoch: [17] [ 830/2502] eta: 0:21:16 lr: 0.000010 loss_cls: 3.8087 (3.9016) grad_norm: 2.3173 (2.3714) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:04 root] (utils.py 283): INFO Epoch: [17] [ 840/2502] eta: 0:21:08 lr: 0.000010 loss_cls: 3.9720 (3.9034) grad_norm: 2.3417 (2.3716) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:12 root] (utils.py 283): INFO Epoch: [17] [ 850/2502] eta: 0:21:01 lr: 0.000010 loss_cls: 4.1547 (3.9060) grad_norm: 2.3559 (2.3717) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:19 root] (utils.py 283): INFO Epoch: [17] [ 860/2502] eta: 0:20:53 lr: 0.000010 loss_cls: 4.1547 (3.9046) grad_norm: 2.3511 (2.3711) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:27 root] (utils.py 283): INFO Epoch: [17] [ 870/2502] eta: 0:20:45 lr: 0.000010 loss_cls: 4.0824 (3.9063) grad_norm: 2.3538 (2.3713) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:35 root] (utils.py 283): INFO Epoch: [17] [ 880/2502] eta: 0:20:38 lr: 0.000010 loss_cls: 4.0884 (3.9065) grad_norm: 2.3894 (2.3721) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:42 root] (utils.py 283): INFO Epoch: [17] [ 890/2502] eta: 0:20:30 lr: 0.000010 loss_cls: 4.0932 (3.9092) grad_norm: 2.3832 (2.3722) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:50 root] (utils.py 283): INFO Epoch: [17] [ 900/2502] eta: 0:20:23 lr: 0.000010 loss_cls: 4.1915 (3.9112) grad_norm: 2.3664 (2.3727) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 20:40:58 root] (utils.py 283): INFO Epoch: [17] [ 910/2502] eta: 0:20:15 lr: 0.000010 loss_cls: 3.8908 (3.9106) grad_norm: 2.3661 (2.3722) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 20:41:05 root] (utils.py 283): INFO Epoch: [17] [ 920/2502] eta: 0:20:08 lr: 0.000010 loss_cls: 3.8908 (3.9127) grad_norm: 2.3531 (2.3725) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 20:41:13 root] (utils.py 283): INFO Epoch: [17] [ 930/2502] eta: 0:20:00 lr: 0.000010 loss_cls: 4.1207 (3.9129) grad_norm: 2.3752 (2.3733) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 20:41:21 root] (utils.py 283): INFO Epoch: [17] [ 940/2502] eta: 0:19:53 lr: 0.000010 loss_cls: 3.9236 (3.9114) grad_norm: 2.4064 (2.3735) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-10 20:41:29 root] (utils.py 283): INFO Epoch: [17] [ 950/2502] eta: 0:19:45 lr: 0.000010 loss_cls: 3.7708 (3.9121) grad_norm: 2.3858 (2.3740) time: 0.7868 data: 0.0002 max mem: 8426 +[2024-12-10 20:41:37 root] (utils.py 283): INFO Epoch: [17] [ 960/2502] eta: 0:19:38 lr: 0.000010 loss_cls: 3.7824 (3.9084) grad_norm: 2.3655 (2.3740) time: 0.7870 data: 0.0002 max mem: 8426 +[2024-12-10 20:41:44 root] (utils.py 283): INFO Epoch: [17] [ 970/2502] eta: 0:19:31 lr: 0.000010 loss_cls: 3.7050 (3.9068) grad_norm: 2.3655 (2.3740) time: 0.7831 data: 0.0002 max mem: 8426 +[2024-12-10 20:41:52 root] (utils.py 283): INFO Epoch: [17] [ 980/2502] eta: 0:19:23 lr: 0.000010 loss_cls: 3.8340 (3.9077) grad_norm: 2.3650 (2.3743) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:00 root] (utils.py 283): INFO Epoch: [17] [ 990/2502] eta: 0:19:16 lr: 0.000010 loss_cls: 4.0427 (3.9087) grad_norm: 2.3652 (2.3747) time: 0.7792 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:08 root] (utils.py 283): INFO Epoch: [17] [1000/2502] eta: 0:19:08 lr: 0.000010 loss_cls: 3.8599 (3.9060) grad_norm: 2.3709 (2.3745) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:16 root] (utils.py 283): INFO Epoch: [17] [1010/2502] eta: 0:19:01 lr: 0.000010 loss_cls: 3.6397 (3.9057) grad_norm: 2.4131 (2.3749) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:23 root] (utils.py 283): INFO Epoch: [17] [1020/2502] eta: 0:18:54 lr: 0.000010 loss_cls: 3.9840 (3.9069) grad_norm: 2.4152 (2.3750) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:31 root] (utils.py 283): INFO Epoch: [17] [1030/2502] eta: 0:18:46 lr: 0.000010 loss_cls: 3.9904 (3.9066) grad_norm: 2.4278 (2.3761) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:39 root] (utils.py 283): INFO Epoch: [17] [1040/2502] eta: 0:18:39 lr: 0.000010 loss_cls: 3.7675 (3.9045) grad_norm: 2.4067 (2.3758) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:47 root] (utils.py 283): INFO Epoch: [17] [1050/2502] eta: 0:18:31 lr: 0.000010 loss_cls: 3.6439 (3.9023) grad_norm: 2.3437 (2.3752) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 20:42:55 root] (utils.py 283): INFO Epoch: [17] [1060/2502] eta: 0:18:24 lr: 0.000010 loss_cls: 3.9583 (3.9037) grad_norm: 2.3129 (2.3751) time: 0.7814 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:03 root] (utils.py 283): INFO Epoch: [17] [1070/2502] eta: 0:18:16 lr: 0.000010 loss_cls: 4.2147 (3.9064) grad_norm: 2.3042 (2.3743) time: 0.7823 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:10 root] (utils.py 283): INFO Epoch: [17] [1080/2502] eta: 0:18:09 lr: 0.000010 loss_cls: 4.1001 (3.9056) grad_norm: 2.2953 (2.3739) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:18 root] (utils.py 283): INFO Epoch: [17] [1090/2502] eta: 0:18:01 lr: 0.000010 loss_cls: 3.6065 (3.9028) grad_norm: 2.3665 (2.3753) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:26 root] (utils.py 283): INFO Epoch: [17] [1100/2502] eta: 0:17:54 lr: 0.000010 loss_cls: 3.7476 (3.9040) grad_norm: 2.4381 (2.3757) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:33 root] (utils.py 283): INFO Epoch: [17] [1110/2502] eta: 0:17:46 lr: 0.000010 loss_cls: 4.0192 (3.9023) grad_norm: 2.3660 (2.3753) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:41 root] (utils.py 283): INFO Epoch: [17] [1120/2502] eta: 0:17:38 lr: 0.000010 loss_cls: 4.0184 (3.9041) grad_norm: 2.3356 (2.3756) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:49 root] (utils.py 283): INFO Epoch: [17] [1130/2502] eta: 0:17:31 lr: 0.000010 loss_cls: 3.8982 (3.9037) grad_norm: 2.3836 (2.3756) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 20:43:56 root] (utils.py 283): INFO Epoch: [17] [1140/2502] eta: 0:17:23 lr: 0.000010 loss_cls: 4.0515 (3.9050) grad_norm: 2.3444 (2.3756) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:04 root] (utils.py 283): INFO Epoch: [17] [1150/2502] eta: 0:17:15 lr: 0.000010 loss_cls: 4.1708 (3.9061) grad_norm: 2.3317 (2.3749) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:12 root] (utils.py 283): INFO Epoch: [17] [1160/2502] eta: 0:17:08 lr: 0.000010 loss_cls: 4.1926 (3.9087) grad_norm: 2.3638 (2.3759) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:19 root] (utils.py 283): INFO Epoch: [17] [1170/2502] eta: 0:17:00 lr: 0.000010 loss_cls: 4.1314 (3.9078) grad_norm: 2.3981 (2.3762) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 20:44:27 root] (utils.py 283): INFO Epoch: [17] [1180/2502] eta: 0:16:52 lr: 0.000010 loss_cls: 4.0618 (3.9089) grad_norm: 2.3579 (2.3760) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:35 root] (utils.py 283): INFO Epoch: [17] [1190/2502] eta: 0:16:45 lr: 0.000010 loss_cls: 4.1484 (3.9097) grad_norm: 2.3437 (2.3757) time: 0.7751 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:42 root] (utils.py 283): INFO Epoch: [17] [1200/2502] eta: 0:16:37 lr: 0.000010 loss_cls: 3.8998 (3.9068) grad_norm: 2.3552 (2.3759) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:50 root] (utils.py 283): INFO Epoch: [17] [1210/2502] eta: 0:16:30 lr: 0.000010 loss_cls: 3.8210 (3.9089) grad_norm: 2.3723 (2.3761) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 20:44:58 root] (utils.py 283): INFO Epoch: [17] [1220/2502] eta: 0:16:22 lr: 0.000010 loss_cls: 4.1026 (3.9094) grad_norm: 2.3390 (2.3755) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:05 root] (utils.py 283): INFO Epoch: [17] [1230/2502] eta: 0:16:14 lr: 0.000010 loss_cls: 3.9546 (3.9083) grad_norm: 2.3283 (2.3755) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:13 root] (utils.py 283): INFO Epoch: [17] [1240/2502] eta: 0:16:06 lr: 0.000010 loss_cls: 3.7435 (3.9061) grad_norm: 2.3374 (2.3753) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:21 root] (utils.py 283): INFO Epoch: [17] [1250/2502] eta: 0:15:59 lr: 0.000010 loss_cls: 3.9443 (3.9066) grad_norm: 2.3651 (2.3752) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:28 root] (utils.py 283): INFO Epoch: [17] [1260/2502] eta: 0:15:51 lr: 0.000010 loss_cls: 4.1017 (3.9064) grad_norm: 2.3682 (2.3757) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:36 root] (utils.py 283): INFO Epoch: [17] [1270/2502] eta: 0:15:43 lr: 0.000010 loss_cls: 4.0753 (3.9038) grad_norm: 2.4509 (2.3759) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:44 root] (utils.py 283): INFO Epoch: [17] [1280/2502] eta: 0:15:36 lr: 0.000010 loss_cls: 3.5402 (3.9036) grad_norm: 2.3674 (2.3761) time: 0.7710 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:51 root] (utils.py 283): INFO Epoch: [17] [1290/2502] eta: 0:15:28 lr: 0.000010 loss_cls: 4.0368 (3.9048) grad_norm: 2.3564 (2.3759) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-10 20:45:59 root] (utils.py 283): INFO Epoch: [17] [1300/2502] eta: 0:15:21 lr: 0.000010 loss_cls: 4.0967 (3.9047) grad_norm: 2.3495 (2.3763) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:07 root] (utils.py 283): INFO Epoch: [17] [1310/2502] eta: 0:15:13 lr: 0.000010 loss_cls: 3.9234 (3.9040) grad_norm: 2.3939 (2.3767) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:14 root] (utils.py 283): INFO Epoch: [17] [1320/2502] eta: 0:15:05 lr: 0.000010 loss_cls: 4.0924 (3.9052) grad_norm: 2.4237 (2.3765) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:22 root] (utils.py 283): INFO Epoch: [17] [1330/2502] eta: 0:14:58 lr: 0.000010 loss_cls: 4.1055 (3.9038) grad_norm: 2.2886 (2.3760) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:30 root] (utils.py 283): INFO Epoch: [17] [1340/2502] eta: 0:14:50 lr: 0.000010 loss_cls: 3.8079 (3.9038) grad_norm: 2.3949 (2.3764) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:37 root] (utils.py 283): INFO Epoch: [17] [1350/2502] eta: 0:14:42 lr: 0.000010 loss_cls: 3.9725 (3.9044) grad_norm: 2.4112 (2.3767) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:45 root] (utils.py 283): INFO Epoch: [17] [1360/2502] eta: 0:14:35 lr: 0.000010 loss_cls: 3.8667 (3.9022) grad_norm: 2.3907 (2.3764) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 20:46:53 root] (utils.py 283): INFO Epoch: [17] [1370/2502] eta: 0:14:27 lr: 0.000010 loss_cls: 4.0207 (3.9025) grad_norm: 2.3813 (2.3769) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 20:47:00 root] (utils.py 283): INFO Epoch: [17] [1380/2502] eta: 0:14:19 lr: 0.000010 loss_cls: 4.1018 (3.9033) grad_norm: 2.3841 (2.3769) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 20:47:08 root] (utils.py 283): INFO Epoch: [17] [1390/2502] eta: 0:14:12 lr: 0.000010 loss_cls: 4.1018 (3.9052) grad_norm: 2.3853 (2.3775) time: 0.7747 data: 0.0003 max mem: 8426 +[2024-12-10 20:47:16 root] (utils.py 283): INFO Epoch: [17] [1400/2502] eta: 0:14:04 lr: 0.000010 loss_cls: 4.1455 (3.9056) grad_norm: 2.3373 (2.3773) time: 0.7740 data: 0.0003 max mem: 8426 +[2024-12-10 20:47:24 root] (utils.py 283): INFO Epoch: [17] [1410/2502] eta: 0:13:56 lr: 0.000010 loss_cls: 4.0959 (3.9054) grad_norm: 2.3303 (2.3773) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 20:47:31 root] (utils.py 283): INFO Epoch: [17] [1420/2502] eta: 0:13:49 lr: 0.000010 loss_cls: 4.0228 (3.9050) grad_norm: 2.3751 (2.3773) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-10 20:47:39 root] (utils.py 283): INFO Epoch: [17] [1430/2502] eta: 0:13:41 lr: 0.000010 loss_cls: 4.0228 (3.9050) grad_norm: 2.3294 (2.3774) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 20:47:47 root] (utils.py 283): INFO Epoch: [17] [1440/2502] eta: 0:13:33 lr: 0.000010 loss_cls: 4.2902 (3.9063) grad_norm: 2.3294 (2.3774) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 20:47:54 root] (utils.py 283): INFO Epoch: [17] [1450/2502] eta: 0:13:26 lr: 0.000010 loss_cls: 4.0294 (3.9065) grad_norm: 2.3428 (2.3775) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 20:48:02 root] (utils.py 283): INFO Epoch: [17] [1460/2502] eta: 0:13:18 lr: 0.000010 loss_cls: 4.0095 (3.9065) grad_norm: 2.3821 (2.3777) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 20:48:10 root] (utils.py 283): INFO Epoch: [17] [1470/2502] eta: 0:13:10 lr: 0.000010 loss_cls: 4.1384 (3.9073) grad_norm: 2.3907 (2.3783) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 20:48:17 root] (utils.py 283): INFO Epoch: [17] [1480/2502] eta: 0:13:03 lr: 0.000010 loss_cls: 3.8145 (3.9063) grad_norm: 2.3677 (2.3778) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 20:48:25 root] (utils.py 283): INFO Epoch: [17] [1490/2502] eta: 0:12:55 lr: 0.000010 loss_cls: 3.7729 (3.9051) grad_norm: 2.3446 (2.3778) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 20:48:33 root] (utils.py 283): INFO Epoch: [17] [1500/2502] eta: 0:12:47 lr: 0.000010 loss_cls: 4.1192 (3.9075) grad_norm: 2.3594 (2.3776) time: 0.7741 data: 0.0003 max mem: 8426 +[2024-12-10 20:48:40 root] (utils.py 283): INFO Epoch: [17] [1510/2502] eta: 0:12:40 lr: 0.000010 loss_cls: 4.1785 (3.9080) grad_norm: 2.3594 (2.3779) time: 0.7774 data: 0.0002 max mem: 8426 +[2024-12-10 20:48:48 root] (utils.py 283): INFO Epoch: [17] [1520/2502] eta: 0:12:32 lr: 0.000010 loss_cls: 4.1145 (3.9095) grad_norm: 2.3283 (2.3778) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 20:48:56 root] (utils.py 283): INFO Epoch: [17] [1530/2502] eta: 0:12:24 lr: 0.000010 loss_cls: 3.9818 (3.9084) grad_norm: 2.3514 (2.3779) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 20:49:03 root] (utils.py 283): INFO Epoch: [17] [1540/2502] eta: 0:12:17 lr: 0.000010 loss_cls: 3.9243 (3.9087) grad_norm: 2.3981 (2.3784) time: 0.7706 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:11 root] (utils.py 283): INFO Epoch: [17] [1550/2502] eta: 0:12:09 lr: 0.000010 loss_cls: 3.9412 (3.9079) grad_norm: 2.3790 (2.3783) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:19 root] (utils.py 283): INFO Epoch: [17] [1560/2502] eta: 0:12:01 lr: 0.000010 loss_cls: 3.9412 (3.9073) grad_norm: 2.3185 (2.3778) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:26 root] (utils.py 283): INFO Epoch: [17] [1570/2502] eta: 0:11:54 lr: 0.000010 loss_cls: 3.9921 (3.9081) grad_norm: 2.3007 (2.3775) time: 0.7754 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:34 root] (utils.py 283): INFO Epoch: [17] [1580/2502] eta: 0:11:46 lr: 0.000010 loss_cls: 4.2238 (3.9101) grad_norm: 2.3128 (2.3772) time: 0.7843 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:42 root] (utils.py 283): INFO Epoch: [17] [1590/2502] eta: 0:11:39 lr: 0.000010 loss_cls: 4.2029 (3.9111) grad_norm: 2.3103 (2.3767) time: 0.7844 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:50 root] (utils.py 283): INFO Epoch: [17] [1600/2502] eta: 0:11:31 lr: 0.000010 loss_cls: 4.1633 (3.9110) grad_norm: 2.3346 (2.3768) time: 0.7840 data: 0.0003 max mem: 8426 +[2024-12-10 20:49:58 root] (utils.py 283): INFO Epoch: [17] [1610/2502] eta: 0:11:24 lr: 0.000010 loss_cls: 3.9235 (3.9108) grad_norm: 2.3961 (2.3766) time: 0.7833 data: 0.0003 max mem: 8426 +[2024-12-10 20:50:06 root] (utils.py 283): INFO Epoch: [17] [1620/2502] eta: 0:11:16 lr: 0.000010 loss_cls: 4.0906 (3.9114) grad_norm: 2.3327 (2.3762) time: 0.7828 data: 0.0003 max mem: 8426 +[2024-12-10 20:50:13 root] (utils.py 283): INFO Epoch: [17] [1630/2502] eta: 0:11:08 lr: 0.000010 loss_cls: 4.0906 (3.9124) grad_norm: 2.3327 (2.3759) time: 0.7820 data: 0.0003 max mem: 8426 +[2024-12-10 20:50:21 root] (utils.py 283): INFO Epoch: [17] [1640/2502] eta: 0:11:01 lr: 0.000010 loss_cls: 4.1093 (3.9132) grad_norm: 2.3539 (2.3762) time: 0.7779 data: 0.0002 max mem: 8426 +[2024-12-10 20:50:29 root] (utils.py 283): INFO Epoch: [17] [1650/2502] eta: 0:10:53 lr: 0.000010 loss_cls: 4.2817 (3.9139) grad_norm: 2.3810 (2.3763) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 20:50:37 root] (utils.py 283): INFO Epoch: [17] [1660/2502] eta: 0:10:45 lr: 0.000010 loss_cls: 4.3427 (3.9153) grad_norm: 2.3012 (2.3761) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 20:50:44 root] (utils.py 283): INFO Epoch: [17] [1670/2502] eta: 0:10:38 lr: 0.000010 loss_cls: 4.2625 (3.9162) grad_norm: 2.4133 (2.3764) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 20:50:52 root] (utils.py 283): INFO Epoch: [17] [1680/2502] eta: 0:10:30 lr: 0.000010 loss_cls: 3.8650 (3.9145) grad_norm: 2.4133 (2.3764) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 20:50:59 root] (utils.py 283): INFO Epoch: [17] [1690/2502] eta: 0:10:22 lr: 0.000010 loss_cls: 3.7912 (3.9148) grad_norm: 2.3804 (2.3768) time: 0.7608 data: 0.0003 max mem: 8426 +[2024-12-10 20:51:07 root] (utils.py 283): INFO Epoch: [17] [1700/2502] eta: 0:10:15 lr: 0.000010 loss_cls: 4.1326 (3.9159) grad_norm: 2.4189 (2.3771) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 20:51:15 root] (utils.py 283): INFO Epoch: [17] [1710/2502] eta: 0:10:07 lr: 0.000010 loss_cls: 4.1326 (3.9167) grad_norm: 2.3433 (2.3770) time: 0.7775 data: 0.0003 max mem: 8426 +[2024-12-10 20:51:23 root] (utils.py 283): INFO Epoch: [17] [1720/2502] eta: 0:10:00 lr: 0.000010 loss_cls: 4.1226 (3.9178) grad_norm: 2.3524 (2.3770) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-10 20:51:31 root] (utils.py 283): INFO Epoch: [17] [1730/2502] eta: 0:09:52 lr: 0.000010 loss_cls: 3.9178 (3.9163) grad_norm: 2.3466 (2.3768) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 20:51:38 root] (utils.py 283): INFO Epoch: [17] [1740/2502] eta: 0:09:44 lr: 0.000010 loss_cls: 3.8449 (3.9174) grad_norm: 2.3391 (2.3768) time: 0.7815 data: 0.0003 max mem: 8426 +[2024-12-10 20:51:46 root] (utils.py 283): INFO Epoch: [17] [1750/2502] eta: 0:09:37 lr: 0.000010 loss_cls: 3.9797 (3.9157) grad_norm: 2.3608 (2.3768) time: 0.7851 data: 0.0003 max mem: 8426 +[2024-12-10 20:51:54 root] (utils.py 283): INFO Epoch: [17] [1760/2502] eta: 0:09:29 lr: 0.000010 loss_cls: 3.7910 (3.9150) grad_norm: 2.4159 (2.3770) time: 0.7846 data: 0.0002 max mem: 8426 +[2024-12-10 20:52:02 root] (utils.py 283): INFO Epoch: [17] [1770/2502] eta: 0:09:22 lr: 0.000010 loss_cls: 4.0005 (3.9157) grad_norm: 2.3801 (2.3771) time: 0.7886 data: 0.0002 max mem: 8426 +[2024-12-10 20:52:10 root] (utils.py 283): INFO Epoch: [17] [1780/2502] eta: 0:09:14 lr: 0.000010 loss_cls: 4.2675 (3.9163) grad_norm: 2.3396 (2.3771) time: 0.7883 data: 0.0002 max mem: 8426 +[2024-12-10 20:52:18 root] (utils.py 283): INFO Epoch: [17] [1790/2502] eta: 0:09:06 lr: 0.000010 loss_cls: 3.8120 (3.9153) grad_norm: 2.3352 (2.3770) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 20:52:25 root] (utils.py 283): INFO Epoch: [17] [1800/2502] eta: 0:08:59 lr: 0.000010 loss_cls: 3.8394 (3.9165) grad_norm: 2.3470 (2.3770) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-10 20:52:33 root] (utils.py 283): INFO Epoch: [17] [1810/2502] eta: 0:08:51 lr: 0.000010 loss_cls: 4.1892 (3.9167) grad_norm: 2.3981 (2.3773) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 20:52:41 root] (utils.py 283): INFO Epoch: [17] [1820/2502] eta: 0:08:43 lr: 0.000010 loss_cls: 4.2575 (3.9182) grad_norm: 2.3518 (2.3770) time: 0.7815 data: 0.0003 max mem: 8426 +[2024-12-10 20:52:49 root] (utils.py 283): INFO Epoch: [17] [1830/2502] eta: 0:08:36 lr: 0.000010 loss_cls: 4.2285 (3.9189) grad_norm: 2.3086 (2.3771) time: 0.7827 data: 0.0003 max mem: 8426 +[2024-12-10 20:52:57 root] (utils.py 283): INFO Epoch: [17] [1840/2502] eta: 0:08:28 lr: 0.000010 loss_cls: 4.0525 (3.9186) grad_norm: 2.3545 (2.3771) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 20:53:05 root] (utils.py 283): INFO Epoch: [17] [1850/2502] eta: 0:08:21 lr: 0.000010 loss_cls: 4.0525 (3.9201) grad_norm: 2.3569 (2.3771) time: 0.7830 data: 0.0003 max mem: 8426 +[2024-12-10 20:53:12 root] (utils.py 283): INFO Epoch: [17] [1860/2502] eta: 0:08:13 lr: 0.000010 loss_cls: 4.1340 (3.9203) grad_norm: 2.3731 (2.3771) time: 0.7828 data: 0.0003 max mem: 8426 +[2024-12-10 20:53:20 root] (utils.py 283): INFO Epoch: [17] [1870/2502] eta: 0:08:05 lr: 0.000010 loss_cls: 4.1340 (3.9203) grad_norm: 2.3542 (2.3775) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-10 20:53:28 root] (utils.py 283): INFO Epoch: [17] [1880/2502] eta: 0:07:58 lr: 0.000010 loss_cls: 3.8844 (3.9188) grad_norm: 2.3262 (2.3776) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-10 20:53:36 root] (utils.py 283): INFO Epoch: [17] [1890/2502] eta: 0:07:50 lr: 0.000010 loss_cls: 3.7456 (3.9176) grad_norm: 2.3136 (2.3774) time: 0.7742 data: 0.0003 max mem: 8426 +[2024-12-10 20:53:43 root] (utils.py 283): INFO Epoch: [17] [1900/2502] eta: 0:07:42 lr: 0.000010 loss_cls: 4.0860 (3.9177) grad_norm: 2.3895 (2.3779) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 20:53:51 root] (utils.py 283): INFO Epoch: [17] [1910/2502] eta: 0:07:35 lr: 0.000010 loss_cls: 4.0860 (3.9174) grad_norm: 2.4272 (2.3781) time: 0.7730 data: 0.0002 max mem: 8426 +[2024-12-10 20:53:59 root] (utils.py 283): INFO Epoch: [17] [1920/2502] eta: 0:07:27 lr: 0.000010 loss_cls: 3.8592 (3.9154) grad_norm: 2.3867 (2.3779) time: 0.7756 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:06 root] (utils.py 283): INFO Epoch: [17] [1930/2502] eta: 0:07:19 lr: 0.000010 loss_cls: 3.9098 (3.9161) grad_norm: 2.3385 (2.3777) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:14 root] (utils.py 283): INFO Epoch: [17] [1940/2502] eta: 0:07:11 lr: 0.000010 loss_cls: 4.0249 (3.9156) grad_norm: 2.3782 (2.3778) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:22 root] (utils.py 283): INFO Epoch: [17] [1950/2502] eta: 0:07:04 lr: 0.000010 loss_cls: 3.9014 (3.9147) grad_norm: 2.3922 (2.3778) time: 0.7733 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:30 root] (utils.py 283): INFO Epoch: [17] [1960/2502] eta: 0:06:56 lr: 0.000010 loss_cls: 4.0280 (3.9161) grad_norm: 2.3922 (2.3778) time: 0.7732 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:37 root] (utils.py 283): INFO Epoch: [17] [1970/2502] eta: 0:06:48 lr: 0.000010 loss_cls: 3.9834 (3.9156) grad_norm: 2.3533 (2.3777) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:45 root] (utils.py 283): INFO Epoch: [17] [1980/2502] eta: 0:06:41 lr: 0.000010 loss_cls: 3.8735 (3.9152) grad_norm: 2.4136 (2.3781) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 20:54:53 root] (utils.py 283): INFO Epoch: [17] [1990/2502] eta: 0:06:33 lr: 0.000010 loss_cls: 3.7749 (3.9142) grad_norm: 2.4516 (2.3785) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:00 root] (utils.py 283): INFO Epoch: [17] [2000/2502] eta: 0:06:25 lr: 0.000010 loss_cls: 3.4767 (3.9128) grad_norm: 2.4287 (2.3788) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:08 root] (utils.py 283): INFO Epoch: [17] [2010/2502] eta: 0:06:18 lr: 0.000010 loss_cls: 4.1085 (3.9143) grad_norm: 2.4913 (2.3796) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:16 root] (utils.py 283): INFO Epoch: [17] [2020/2502] eta: 0:06:10 lr: 0.000010 loss_cls: 4.1085 (3.9134) grad_norm: 2.5533 (2.3801) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:23 root] (utils.py 283): INFO Epoch: [17] [2030/2502] eta: 0:06:02 lr: 0.000010 loss_cls: 3.9052 (3.9131) grad_norm: 2.4304 (2.3801) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:31 root] (utils.py 283): INFO Epoch: [17] [2040/2502] eta: 0:05:55 lr: 0.000010 loss_cls: 3.9778 (3.9127) grad_norm: 2.4313 (2.3805) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:39 root] (utils.py 283): INFO Epoch: [17] [2050/2502] eta: 0:05:47 lr: 0.000010 loss_cls: 4.1131 (3.9130) grad_norm: 2.4229 (2.3804) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:46 root] (utils.py 283): INFO Epoch: [17] [2060/2502] eta: 0:05:39 lr: 0.000010 loss_cls: 4.0192 (3.9132) grad_norm: 2.4408 (2.3809) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 20:55:54 root] (utils.py 283): INFO Epoch: [17] [2070/2502] eta: 0:05:32 lr: 0.000010 loss_cls: 4.0409 (3.9138) grad_norm: 2.4590 (2.3811) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 20:56:02 root] (utils.py 283): INFO Epoch: [17] [2080/2502] eta: 0:05:24 lr: 0.000010 loss_cls: 4.2272 (3.9144) grad_norm: 2.3305 (2.3809) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 20:56:09 root] (utils.py 283): INFO Epoch: [17] [2090/2502] eta: 0:05:16 lr: 0.000010 loss_cls: 4.0465 (3.9133) grad_norm: 2.3273 (2.3808) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 20:56:17 root] (utils.py 283): INFO Epoch: [17] [2100/2502] eta: 0:05:08 lr: 0.000010 loss_cls: 3.8460 (3.9134) grad_norm: 2.3954 (2.3812) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 20:56:25 root] (utils.py 283): INFO Epoch: [17] [2110/2502] eta: 0:05:01 lr: 0.000010 loss_cls: 4.1411 (3.9136) grad_norm: 2.4065 (2.3812) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 20:56:32 root] (utils.py 283): INFO Epoch: [17] [2120/2502] eta: 0:04:53 lr: 0.000010 loss_cls: 4.1919 (3.9141) grad_norm: 2.3605 (2.3810) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 20:56:40 root] (utils.py 283): INFO Epoch: [17] [2130/2502] eta: 0:04:45 lr: 0.000010 loss_cls: 4.1005 (3.9143) grad_norm: 2.3854 (2.3813) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 20:56:47 root] (utils.py 283): INFO Epoch: [17] [2140/2502] eta: 0:04:38 lr: 0.000010 loss_cls: 3.8936 (3.9136) grad_norm: 2.3870 (2.3811) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:56:55 root] (utils.py 283): INFO Epoch: [17] [2150/2502] eta: 0:04:30 lr: 0.000010 loss_cls: 3.6927 (3.9133) grad_norm: 2.3858 (2.3812) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:03 root] (utils.py 283): INFO Epoch: [17] [2160/2502] eta: 0:04:22 lr: 0.000010 loss_cls: 3.7614 (3.9130) grad_norm: 2.3858 (2.3812) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:10 root] (utils.py 283): INFO Epoch: [17] [2170/2502] eta: 0:04:15 lr: 0.000010 loss_cls: 3.7614 (3.9130) grad_norm: 2.3768 (2.3811) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:18 root] (utils.py 283): INFO Epoch: [17] [2180/2502] eta: 0:04:07 lr: 0.000010 loss_cls: 4.1518 (3.9137) grad_norm: 2.3943 (2.3812) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:26 root] (utils.py 283): INFO Epoch: [17] [2190/2502] eta: 0:03:59 lr: 0.000010 loss_cls: 4.2067 (3.9152) grad_norm: 2.3819 (2.3813) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:33 root] (utils.py 283): INFO Epoch: [17] [2200/2502] eta: 0:03:52 lr: 0.000010 loss_cls: 4.1991 (3.9157) grad_norm: 2.3481 (2.3811) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:41 root] (utils.py 283): INFO Epoch: [17] [2210/2502] eta: 0:03:44 lr: 0.000010 loss_cls: 4.0645 (3.9156) grad_norm: 2.3831 (2.3811) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 20:57:49 root] (utils.py 283): INFO Epoch: [17] [2220/2502] eta: 0:03:36 lr: 0.000010 loss_cls: 4.0113 (3.9158) grad_norm: 2.4004 (2.3814) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 20:57:56 root] (utils.py 283): INFO Epoch: [17] [2230/2502] eta: 0:03:28 lr: 0.000010 loss_cls: 4.0113 (3.9157) grad_norm: 2.3879 (2.3813) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 20:58:04 root] (utils.py 283): INFO Epoch: [17] [2240/2502] eta: 0:03:21 lr: 0.000010 loss_cls: 4.0117 (3.9151) grad_norm: 2.3330 (2.3809) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 20:58:12 root] (utils.py 283): INFO Epoch: [17] [2250/2502] eta: 0:03:13 lr: 0.000010 loss_cls: 4.0179 (3.9157) grad_norm: 2.3049 (2.3806) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 20:58:19 root] (utils.py 283): INFO Epoch: [17] [2260/2502] eta: 0:03:05 lr: 0.000010 loss_cls: 4.1329 (3.9157) grad_norm: 2.3292 (2.3806) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:58:27 root] (utils.py 283): INFO Epoch: [17] [2270/2502] eta: 0:02:58 lr: 0.000010 loss_cls: 4.1402 (3.9168) grad_norm: 2.3590 (2.3807) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:58:35 root] (utils.py 283): INFO Epoch: [17] [2280/2502] eta: 0:02:50 lr: 0.000010 loss_cls: 4.1750 (3.9156) grad_norm: 2.4436 (2.3811) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 20:58:42 root] (utils.py 283): INFO Epoch: [17] [2290/2502] eta: 0:02:42 lr: 0.000010 loss_cls: 3.7729 (3.9150) grad_norm: 2.4346 (2.3812) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 20:58:50 root] (utils.py 283): INFO Epoch: [17] [2300/2502] eta: 0:02:35 lr: 0.000010 loss_cls: 3.9403 (3.9154) grad_norm: 2.4119 (2.3814) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 20:58:58 root] (utils.py 283): INFO Epoch: [17] [2310/2502] eta: 0:02:27 lr: 0.000010 loss_cls: 3.9274 (3.9141) grad_norm: 2.3925 (2.3816) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 20:59:05 root] (utils.py 283): INFO Epoch: [17] [2320/2502] eta: 0:02:19 lr: 0.000010 loss_cls: 3.7877 (3.9140) grad_norm: 2.3856 (2.3816) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 20:59:13 root] (utils.py 283): INFO Epoch: [17] [2330/2502] eta: 0:02:12 lr: 0.000010 loss_cls: 4.0058 (3.9135) grad_norm: 2.3550 (2.3813) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 20:59:21 root] (utils.py 283): INFO Epoch: [17] [2340/2502] eta: 0:02:04 lr: 0.000010 loss_cls: 3.6838 (3.9127) grad_norm: 2.3411 (2.3812) time: 0.7708 data: 0.0003 max mem: 8426 +[2024-12-10 20:59:28 root] (utils.py 283): INFO Epoch: [17] [2350/2502] eta: 0:01:56 lr: 0.000010 loss_cls: 3.8716 (3.9132) grad_norm: 2.3633 (2.3812) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 20:59:36 root] (utils.py 283): INFO Epoch: [17] [2360/2502] eta: 0:01:49 lr: 0.000010 loss_cls: 3.9548 (3.9132) grad_norm: 2.3089 (2.3810) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 20:59:44 root] (utils.py 283): INFO Epoch: [17] [2370/2502] eta: 0:01:41 lr: 0.000010 loss_cls: 3.9548 (3.9128) grad_norm: 2.2958 (2.3806) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 20:59:51 root] (utils.py 283): INFO Epoch: [17] [2380/2502] eta: 0:01:33 lr: 0.000010 loss_cls: 3.9403 (3.9126) grad_norm: 2.3356 (2.3807) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 20:59:59 root] (utils.py 283): INFO Epoch: [17] [2390/2502] eta: 0:01:26 lr: 0.000010 loss_cls: 3.7961 (3.9123) grad_norm: 2.3458 (2.3805) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 21:00:07 root] (utils.py 283): INFO Epoch: [17] [2400/2502] eta: 0:01:18 lr: 0.000010 loss_cls: 3.7961 (3.9113) grad_norm: 2.3571 (2.3808) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 21:00:14 root] (utils.py 283): INFO Epoch: [17] [2410/2502] eta: 0:01:10 lr: 0.000010 loss_cls: 4.0361 (3.9119) grad_norm: 2.3666 (2.3806) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 21:00:22 root] (utils.py 283): INFO Epoch: [17] [2420/2502] eta: 0:01:02 lr: 0.000010 loss_cls: 4.2148 (3.9123) grad_norm: 2.3477 (2.3805) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 21:00:30 root] (utils.py 283): INFO Epoch: [17] [2430/2502] eta: 0:00:55 lr: 0.000010 loss_cls: 4.1383 (3.9125) grad_norm: 2.3145 (2.3804) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 21:00:37 root] (utils.py 283): INFO Epoch: [17] [2440/2502] eta: 0:00:47 lr: 0.000010 loss_cls: 4.1307 (3.9131) grad_norm: 2.3447 (2.3802) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 21:00:45 root] (utils.py 283): INFO Epoch: [17] [2450/2502] eta: 0:00:39 lr: 0.000010 loss_cls: 3.9839 (3.9120) grad_norm: 2.3543 (2.3802) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 21:00:53 root] (utils.py 283): INFO Epoch: [17] [2460/2502] eta: 0:00:32 lr: 0.000010 loss_cls: 3.8089 (3.9115) grad_norm: 2.3478 (2.3801) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 21:01:00 root] (utils.py 283): INFO Epoch: [17] [2470/2502] eta: 0:00:24 lr: 0.000010 loss_cls: 4.0965 (3.9126) grad_norm: 2.3256 (2.3801) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 21:01:08 root] (utils.py 283): INFO Epoch: [17] [2480/2502] eta: 0:00:16 lr: 0.000010 loss_cls: 4.1498 (3.9134) grad_norm: 2.3256 (2.3800) time: 0.7727 data: 0.0002 max mem: 8426 +[2024-12-10 21:01:16 root] (utils.py 283): INFO Epoch: [17] [2490/2502] eta: 0:00:09 lr: 0.000010 loss_cls: 4.0101 (3.9129) grad_norm: 2.3629 (2.3800) time: 0.7962 data: 0.0244 max mem: 8426 +[2024-12-10 21:01:24 root] (utils.py 283): INFO Epoch: [17] [2500/2502] eta: 0:00:01 lr: 0.000010 loss_cls: 4.0101 (3.9131) grad_norm: 2.3700 (2.3798) time: 0.7915 data: 0.0244 max mem: 8426 +[2024-12-10 21:01:25 root] (utils.py 283): INFO Epoch: [17] [2501/2502] eta: 0:00:00 lr: 0.000010 loss_cls: 4.0101 (3.9130) grad_norm: 2.2996 (2.3798) time: 0.7906 data: 0.0244 max mem: 8426 +[2024-12-10 21:01:25 root] (utils.py 297): INFO Epoch: [17] Total time: 0:32:02 (0.7684 s / it) +[2024-12-10 21:01:25 root] (engine.py 179): INFO Averaged stats:lr: 0.000010 loss_cls: 4.0101 (3.9131) grad_norm: 2.2996 (2.3798) +[2024-12-10 21:01:25 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6345 (0.6345) acc1: 85.9375 (85.9375) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1275 data: 0.0003 max mem: 8426 +[2024-12-10 21:01:26 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7399 (0.8076) acc1: 85.9375 (82.6705) acc3: 95.3125 (93.1818) acc5: 97.6562 (96.6619) time: 0.1276 data: 0.0003 max mem: 8426 +[2024-12-10 21:01:28 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8627 (0.8599) acc1: 78.9062 (81.3616) acc3: 92.1875 (92.7083) acc5: 96.0938 (95.9077) time: 0.1278 data: 0.0004 max mem: 8426 +[2024-12-10 21:01:29 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9379 (0.8770) acc1: 78.9062 (80.5192) acc3: 92.1875 (92.8679) acc5: 96.0938 (95.8669) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 21:01:30 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8193 (0.8644) acc1: 79.6875 (80.9451) acc3: 94.5312 (92.9688) acc5: 96.0938 (95.7889) time: 0.1309 data: 0.0031 max mem: 8426 +[2024-12-10 21:01:32 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0229 (0.9508) acc1: 75.0000 (78.9216) acc3: 87.5000 (91.7126) acc5: 92.1875 (94.6998) time: 0.1642 data: 0.0344 max mem: 8426 +[2024-12-10 21:01:34 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2541 (0.9970) acc1: 71.8750 (78.1762) acc3: 85.9375 (90.7915) acc5: 89.0625 (93.8397) time: 0.1619 data: 0.0323 max mem: 8426 +[2024-12-10 21:01:35 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1904 (1.0355) acc1: 74.2188 (77.2337) acc3: 86.7188 (90.2289) acc5: 89.8438 (93.3539) time: 0.1304 data: 0.0009 max mem: 8426 +[2024-12-10 21:01:37 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1904 (1.0687) acc1: 72.6562 (76.4082) acc3: 86.7188 (89.7473) acc5: 89.8438 (92.8434) time: 0.1572 data: 0.0260 max mem: 8426 +[2024-12-10 21:01:38 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2595 (1.0960) acc1: 71.0938 (75.6954) acc3: 86.7188 (89.3973) acc5: 89.8438 (92.6168) time: 0.1561 data: 0.0260 max mem: 8426 +[2024-12-10 21:01:39 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1534 (1.0843) acc1: 74.2188 (75.8560) acc3: 89.0625 (89.5520) acc5: 92.1875 (92.8080) time: 0.1289 data: 0.0007 max mem: 8426 +[2024-12-10 21:01:39 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1417 s / it) +[2024-12-10 21:01:39 root] (engine.py 264): INFO * Acc@1 75.570 Acc@3 89.570 Acc@5 92.818 loss 1.085 flops 1.285 layer_flops 1.251 +[2024-12-10 21:01:39 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.6% +[2024-12-10 21:01:39 root] (main.py 576): INFO Max accuracy: 75.72% +[2024-12-10 21:01:40 root] (utils.py 283): INFO Epoch: [18] [ 0/2502] eta: 0:33:06 lr: 0.000009 loss_cls: 4.1520 (4.1520) grad_norm: 2.2733 (2.2733) time: 0.7941 data: 0.0002 max mem: 8426 +[2024-12-10 21:01:47 root] (utils.py 283): INFO Epoch: [18] [ 10/2502] eta: 0:32:13 lr: 0.000009 loss_cls: 4.0398 (3.8805) grad_norm: 2.3559 (2.3412) time: 0.7758 data: 0.0003 max mem: 8426 +[2024-12-10 21:01:55 root] (utils.py 283): INFO Epoch: [18] [ 20/2502] eta: 0:31:53 lr: 0.000009 loss_cls: 3.9877 (3.8852) grad_norm: 2.3567 (2.3506) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 21:02:03 root] (utils.py 283): INFO Epoch: [18] [ 30/2502] eta: 0:31:41 lr: 0.000009 loss_cls: 3.9547 (3.9055) grad_norm: 2.3567 (2.3708) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:10 root] (utils.py 283): INFO Epoch: [18] [ 40/2502] eta: 0:31:30 lr: 0.000009 loss_cls: 3.9877 (3.9519) grad_norm: 2.3093 (2.3527) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:18 root] (utils.py 283): INFO Epoch: [18] [ 50/2502] eta: 0:31:21 lr: 0.000009 loss_cls: 4.2008 (4.0164) grad_norm: 2.3093 (2.3556) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:26 root] (utils.py 283): INFO Epoch: [18] [ 60/2502] eta: 0:31:11 lr: 0.000009 loss_cls: 4.2008 (4.0179) grad_norm: 2.3868 (2.3644) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:33 root] (utils.py 283): INFO Epoch: [18] [ 70/2502] eta: 0:31:00 lr: 0.000009 loss_cls: 4.0149 (4.0033) grad_norm: 2.3976 (2.3691) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:41 root] (utils.py 283): INFO Epoch: [18] [ 80/2502] eta: 0:30:49 lr: 0.000009 loss_cls: 4.1156 (3.9950) grad_norm: 2.3976 (2.3797) time: 0.7556 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:48 root] (utils.py 283): INFO Epoch: [18] [ 90/2502] eta: 0:30:41 lr: 0.000009 loss_cls: 4.0587 (3.9812) grad_norm: 2.4051 (2.3844) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-10 21:02:56 root] (utils.py 283): INFO Epoch: [18] [ 100/2502] eta: 0:30:32 lr: 0.000009 loss_cls: 4.1350 (3.9953) grad_norm: 2.4119 (2.3843) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:04 root] (utils.py 283): INFO Epoch: [18] [ 110/2502] eta: 0:30:24 lr: 0.000009 loss_cls: 4.2038 (3.9876) grad_norm: 2.3713 (2.3847) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:11 root] (utils.py 283): INFO Epoch: [18] [ 120/2502] eta: 0:30:15 lr: 0.000009 loss_cls: 3.9888 (3.9755) grad_norm: 2.3418 (2.3812) time: 0.7593 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:19 root] (utils.py 283): INFO Epoch: [18] [ 130/2502] eta: 0:30:07 lr: 0.000009 loss_cls: 4.0522 (3.9737) grad_norm: 2.3390 (2.3808) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:27 root] (utils.py 283): INFO Epoch: [18] [ 140/2502] eta: 0:30:03 lr: 0.000009 loss_cls: 4.0403 (3.9682) grad_norm: 2.3798 (2.3785) time: 0.7717 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:34 root] (utils.py 283): INFO Epoch: [18] [ 150/2502] eta: 0:29:55 lr: 0.000009 loss_cls: 3.9587 (3.9822) grad_norm: 2.3795 (2.3768) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:42 root] (utils.py 283): INFO Epoch: [18] [ 160/2502] eta: 0:29:46 lr: 0.000009 loss_cls: 3.9377 (3.9575) grad_norm: 2.3853 (2.3812) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:49 root] (utils.py 283): INFO Epoch: [18] [ 170/2502] eta: 0:29:38 lr: 0.000009 loss_cls: 3.8449 (3.9579) grad_norm: 2.4111 (2.3789) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 21:03:57 root] (utils.py 283): INFO Epoch: [18] [ 180/2502] eta: 0:29:31 lr: 0.000009 loss_cls: 3.9296 (3.9552) grad_norm: 2.3384 (2.3799) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 21:04:05 root] (utils.py 283): INFO Epoch: [18] [ 190/2502] eta: 0:29:23 lr: 0.000009 loss_cls: 3.8532 (3.9507) grad_norm: 2.3834 (2.3814) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 21:04:12 root] (utils.py 283): INFO Epoch: [18] [ 200/2502] eta: 0:29:14 lr: 0.000009 loss_cls: 3.9417 (3.9498) grad_norm: 2.3834 (2.3799) time: 0.7585 data: 0.0003 max mem: 8426 +[2024-12-10 21:04:20 root] (utils.py 283): INFO Epoch: [18] [ 210/2502] eta: 0:29:06 lr: 0.000009 loss_cls: 3.9698 (3.9424) grad_norm: 2.3189 (2.3794) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 21:04:27 root] (utils.py 283): INFO Epoch: [18] [ 220/2502] eta: 0:28:58 lr: 0.000009 loss_cls: 3.9565 (3.9357) grad_norm: 2.3494 (2.3775) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 21:04:35 root] (utils.py 283): INFO Epoch: [18] [ 230/2502] eta: 0:28:50 lr: 0.000009 loss_cls: 4.0621 (3.9444) grad_norm: 2.4102 (2.3830) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 21:04:43 root] (utils.py 283): INFO Epoch: [18] [ 240/2502] eta: 0:28:43 lr: 0.000009 loss_cls: 4.1007 (3.9416) grad_norm: 2.4386 (2.3840) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 21:04:50 root] (utils.py 283): INFO Epoch: [18] [ 250/2502] eta: 0:28:36 lr: 0.000009 loss_cls: 4.0266 (3.9477) grad_norm: 2.3846 (2.3820) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 21:04:58 root] (utils.py 283): INFO Epoch: [18] [ 260/2502] eta: 0:28:30 lr: 0.000009 loss_cls: 4.1012 (3.9506) grad_norm: 2.3541 (2.3821) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:06 root] (utils.py 283): INFO Epoch: [18] [ 270/2502] eta: 0:28:24 lr: 0.000009 loss_cls: 4.2098 (3.9596) grad_norm: 2.3918 (2.3812) time: 0.7837 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:14 root] (utils.py 283): INFO Epoch: [18] [ 280/2502] eta: 0:28:16 lr: 0.000009 loss_cls: 4.2776 (3.9652) grad_norm: 2.3652 (2.3806) time: 0.7761 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:21 root] (utils.py 283): INFO Epoch: [18] [ 290/2502] eta: 0:28:08 lr: 0.000009 loss_cls: 4.0190 (3.9528) grad_norm: 2.3312 (2.3799) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:29 root] (utils.py 283): INFO Epoch: [18] [ 300/2502] eta: 0:28:00 lr: 0.000009 loss_cls: 3.6420 (3.9467) grad_norm: 2.3447 (2.3802) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:36 root] (utils.py 283): INFO Epoch: [18] [ 310/2502] eta: 0:27:53 lr: 0.000009 loss_cls: 4.0687 (3.9468) grad_norm: 2.2991 (2.3759) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:44 root] (utils.py 283): INFO Epoch: [18] [ 320/2502] eta: 0:27:45 lr: 0.000009 loss_cls: 3.4687 (3.9296) grad_norm: 2.3103 (2.3787) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:52 root] (utils.py 283): INFO Epoch: [18] [ 330/2502] eta: 0:27:37 lr: 0.000009 loss_cls: 3.4687 (3.9298) grad_norm: 2.4218 (2.3799) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 21:05:59 root] (utils.py 283): INFO Epoch: [18] [ 340/2502] eta: 0:27:30 lr: 0.000009 loss_cls: 3.6608 (3.9270) grad_norm: 2.4011 (2.3810) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:07 root] (utils.py 283): INFO Epoch: [18] [ 350/2502] eta: 0:27:21 lr: 0.000009 loss_cls: 4.0589 (3.9267) grad_norm: 2.3774 (2.3815) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:14 root] (utils.py 283): INFO Epoch: [18] [ 360/2502] eta: 0:27:13 lr: 0.000009 loss_cls: 4.2596 (3.9317) grad_norm: 2.3845 (2.3808) time: 0.7558 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:22 root] (utils.py 283): INFO Epoch: [18] [ 370/2502] eta: 0:27:06 lr: 0.000009 loss_cls: 4.1104 (3.9301) grad_norm: 2.3906 (2.3801) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:30 root] (utils.py 283): INFO Epoch: [18] [ 380/2502] eta: 0:26:58 lr: 0.000009 loss_cls: 3.9815 (3.9326) grad_norm: 2.4470 (2.3824) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:37 root] (utils.py 283): INFO Epoch: [18] [ 390/2502] eta: 0:26:50 lr: 0.000009 loss_cls: 4.0950 (3.9343) grad_norm: 2.4410 (2.3826) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:45 root] (utils.py 283): INFO Epoch: [18] [ 400/2502] eta: 0:26:43 lr: 0.000009 loss_cls: 4.0950 (3.9327) grad_norm: 2.3599 (2.3815) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 21:06:52 root] (utils.py 283): INFO Epoch: [18] [ 410/2502] eta: 0:26:35 lr: 0.000009 loss_cls: 4.0448 (3.9322) grad_norm: 2.3737 (2.3823) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 21:07:00 root] (utils.py 283): INFO Epoch: [18] [ 420/2502] eta: 0:26:27 lr: 0.000009 loss_cls: 3.8811 (3.9322) grad_norm: 2.3359 (2.3816) time: 0.7590 data: 0.0003 max mem: 8426 +[2024-12-10 21:07:08 root] (utils.py 283): INFO Epoch: [18] [ 430/2502] eta: 0:26:19 lr: 0.000009 loss_cls: 3.8811 (3.9317) grad_norm: 2.3052 (2.3797) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 21:07:15 root] (utils.py 283): INFO Epoch: [18] [ 440/2502] eta: 0:26:11 lr: 0.000009 loss_cls: 3.6727 (3.9256) grad_norm: 2.3397 (2.3799) time: 0.7582 data: 0.0002 max mem: 8426 +[2024-12-10 21:07:23 root] (utils.py 283): INFO Epoch: [18] [ 450/2502] eta: 0:26:03 lr: 0.000009 loss_cls: 4.1653 (3.9328) grad_norm: 2.4119 (2.3815) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 21:07:30 root] (utils.py 283): INFO Epoch: [18] [ 460/2502] eta: 0:25:56 lr: 0.000009 loss_cls: 4.1653 (3.9270) grad_norm: 2.3848 (2.3811) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 21:07:38 root] (utils.py 283): INFO Epoch: [18] [ 470/2502] eta: 0:25:48 lr: 0.000009 loss_cls: 3.8045 (3.9269) grad_norm: 2.3196 (2.3805) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 21:07:46 root] (utils.py 283): INFO Epoch: [18] [ 480/2502] eta: 0:25:41 lr: 0.000009 loss_cls: 3.6923 (3.9206) grad_norm: 2.3845 (2.3815) time: 0.7577 data: 0.0003 max mem: 8426 +[2024-12-10 21:07:53 root] (utils.py 283): INFO Epoch: [18] [ 490/2502] eta: 0:25:33 lr: 0.000009 loss_cls: 3.3893 (3.9183) grad_norm: 2.3863 (2.3814) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 21:08:01 root] (utils.py 283): INFO Epoch: [18] [ 500/2502] eta: 0:25:25 lr: 0.000009 loss_cls: 3.3707 (3.9066) grad_norm: 2.3176 (2.3804) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 21:08:08 root] (utils.py 283): INFO Epoch: [18] [ 510/2502] eta: 0:25:17 lr: 0.000009 loss_cls: 3.6749 (3.9105) grad_norm: 2.2880 (2.3789) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 21:08:16 root] (utils.py 283): INFO Epoch: [18] [ 520/2502] eta: 0:25:09 lr: 0.000009 loss_cls: 4.1507 (3.9124) grad_norm: 2.2795 (2.3778) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 21:08:24 root] (utils.py 283): INFO Epoch: [18] [ 530/2502] eta: 0:25:02 lr: 0.000009 loss_cls: 4.2115 (3.9110) grad_norm: 2.3256 (2.3767) time: 0.7580 data: 0.0003 max mem: 8426 +[2024-12-10 21:08:31 root] (utils.py 283): INFO Epoch: [18] [ 540/2502] eta: 0:24:54 lr: 0.000009 loss_cls: 3.7099 (3.9077) grad_norm: 2.2897 (2.3751) time: 0.7588 data: 0.0003 max mem: 8426 +[2024-12-10 21:08:39 root] (utils.py 283): INFO Epoch: [18] [ 550/2502] eta: 0:24:46 lr: 0.000009 loss_cls: 3.6433 (3.9043) grad_norm: 2.3536 (2.3762) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 21:08:46 root] (utils.py 283): INFO Epoch: [18] [ 560/2502] eta: 0:24:39 lr: 0.000009 loss_cls: 4.0096 (3.9075) grad_norm: 2.4309 (2.3763) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 21:08:54 root] (utils.py 283): INFO Epoch: [18] [ 570/2502] eta: 0:24:31 lr: 0.000009 loss_cls: 4.1048 (3.9079) grad_norm: 2.3316 (2.3765) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 21:09:02 root] (utils.py 283): INFO Epoch: [18] [ 580/2502] eta: 0:24:23 lr: 0.000009 loss_cls: 4.0123 (3.9083) grad_norm: 2.3606 (2.3766) time: 0.7570 data: 0.0003 max mem: 8426 +[2024-12-10 21:09:09 root] (utils.py 283): INFO Epoch: [18] [ 590/2502] eta: 0:24:16 lr: 0.000009 loss_cls: 3.6677 (3.9054) grad_norm: 2.3755 (2.3767) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 21:09:17 root] (utils.py 283): INFO Epoch: [18] [ 600/2502] eta: 0:24:08 lr: 0.000009 loss_cls: 3.6622 (3.9045) grad_norm: 2.3371 (2.3761) time: 0.7549 data: 0.0002 max mem: 8426 +[2024-12-10 21:09:24 root] (utils.py 283): INFO Epoch: [18] [ 610/2502] eta: 0:24:00 lr: 0.000009 loss_cls: 3.5897 (3.8993) grad_norm: 2.2946 (2.3755) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 21:09:32 root] (utils.py 283): INFO Epoch: [18] [ 620/2502] eta: 0:23:52 lr: 0.000009 loss_cls: 3.8523 (3.9000) grad_norm: 2.3994 (2.3775) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 21:09:39 root] (utils.py 283): INFO Epoch: [18] [ 630/2502] eta: 0:23:45 lr: 0.000009 loss_cls: 3.9689 (3.8995) grad_norm: 2.4743 (2.3798) time: 0.7577 data: 0.0003 max mem: 8426 +[2024-12-10 21:09:47 root] (utils.py 283): INFO Epoch: [18] [ 640/2502] eta: 0:23:37 lr: 0.000009 loss_cls: 4.1960 (3.9056) grad_norm: 2.4567 (2.3805) time: 0.7555 data: 0.0003 max mem: 8426 +[2024-12-10 21:09:55 root] (utils.py 283): INFO Epoch: [18] [ 650/2502] eta: 0:23:29 lr: 0.000009 loss_cls: 4.1960 (3.9079) grad_norm: 2.4102 (2.3799) time: 0.7593 data: 0.0003 max mem: 8426 +[2024-12-10 21:10:02 root] (utils.py 283): INFO Epoch: [18] [ 660/2502] eta: 0:23:22 lr: 0.000009 loss_cls: 4.1857 (3.9097) grad_norm: 2.3321 (2.3797) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 21:10:10 root] (utils.py 283): INFO Epoch: [18] [ 670/2502] eta: 0:23:14 lr: 0.000009 loss_cls: 4.0351 (3.9085) grad_norm: 2.2842 (2.3783) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 21:10:17 root] (utils.py 283): INFO Epoch: [18] [ 680/2502] eta: 0:23:06 lr: 0.000009 loss_cls: 3.8884 (3.9071) grad_norm: 2.3545 (2.3785) time: 0.7577 data: 0.0002 max mem: 8426 +[2024-12-10 21:10:25 root] (utils.py 283): INFO Epoch: [18] [ 690/2502] eta: 0:22:59 lr: 0.000009 loss_cls: 4.1739 (3.9114) grad_norm: 2.3672 (2.3783) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 21:10:33 root] (utils.py 283): INFO Epoch: [18] [ 700/2502] eta: 0:22:51 lr: 0.000009 loss_cls: 4.0379 (3.9082) grad_norm: 2.3424 (2.3783) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 21:10:40 root] (utils.py 283): INFO Epoch: [18] [ 710/2502] eta: 0:22:43 lr: 0.000009 loss_cls: 3.9788 (3.9089) grad_norm: 2.3190 (2.3778) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-10 21:10:48 root] (utils.py 283): INFO Epoch: [18] [ 720/2502] eta: 0:22:36 lr: 0.000009 loss_cls: 3.8203 (3.9058) grad_norm: 2.3384 (2.3781) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 21:10:55 root] (utils.py 283): INFO Epoch: [18] [ 730/2502] eta: 0:22:28 lr: 0.000009 loss_cls: 3.8505 (3.9068) grad_norm: 2.3403 (2.3776) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 21:11:03 root] (utils.py 283): INFO Epoch: [18] [ 740/2502] eta: 0:22:20 lr: 0.000009 loss_cls: 4.0416 (3.9080) grad_norm: 2.3568 (2.3779) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 21:11:10 root] (utils.py 283): INFO Epoch: [18] [ 750/2502] eta: 0:22:12 lr: 0.000009 loss_cls: 4.0196 (3.9084) grad_norm: 2.4053 (2.3783) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 21:11:18 root] (utils.py 283): INFO Epoch: [18] [ 760/2502] eta: 0:22:05 lr: 0.000009 loss_cls: 3.9957 (3.9075) grad_norm: 2.4340 (2.3789) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 21:11:26 root] (utils.py 283): INFO Epoch: [18] [ 770/2502] eta: 0:21:58 lr: 0.000009 loss_cls: 3.5030 (3.9009) grad_norm: 2.4756 (2.3808) time: 0.7792 data: 0.0003 max mem: 8426 +[2024-12-10 21:11:34 root] (utils.py 283): INFO Epoch: [18] [ 780/2502] eta: 0:21:51 lr: 0.000009 loss_cls: 3.5120 (3.9014) grad_norm: 2.4100 (2.3806) time: 0.7792 data: 0.0003 max mem: 8426 +[2024-12-10 21:11:41 root] (utils.py 283): INFO Epoch: [18] [ 790/2502] eta: 0:21:43 lr: 0.000009 loss_cls: 4.1447 (3.9044) grad_norm: 2.3388 (2.3805) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 21:11:49 root] (utils.py 283): INFO Epoch: [18] [ 800/2502] eta: 0:21:36 lr: 0.000009 loss_cls: 4.1616 (3.9057) grad_norm: 2.3717 (2.3809) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 21:11:57 root] (utils.py 283): INFO Epoch: [18] [ 810/2502] eta: 0:21:28 lr: 0.000009 loss_cls: 3.6669 (3.9025) grad_norm: 2.3805 (2.3809) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 21:12:04 root] (utils.py 283): INFO Epoch: [18] [ 820/2502] eta: 0:21:20 lr: 0.000009 loss_cls: 3.4377 (3.8992) grad_norm: 2.3688 (2.3803) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 21:12:12 root] (utils.py 283): INFO Epoch: [18] [ 830/2502] eta: 0:21:13 lr: 0.000009 loss_cls: 3.5569 (3.8951) grad_norm: 2.3931 (2.3810) time: 0.7602 data: 0.0003 max mem: 8426 +[2024-12-10 21:12:19 root] (utils.py 283): INFO Epoch: [18] [ 840/2502] eta: 0:21:05 lr: 0.000009 loss_cls: 3.5573 (3.8933) grad_norm: 2.4007 (2.3816) time: 0.7572 data: 0.0003 max mem: 8426 +[2024-12-10 21:12:27 root] (utils.py 283): INFO Epoch: [18] [ 850/2502] eta: 0:20:57 lr: 0.000009 loss_cls: 3.8339 (3.8936) grad_norm: 2.3839 (2.3821) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 21:12:35 root] (utils.py 283): INFO Epoch: [18] [ 860/2502] eta: 0:20:50 lr: 0.000009 loss_cls: 4.0521 (3.8926) grad_norm: 2.3823 (2.3827) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 21:12:42 root] (utils.py 283): INFO Epoch: [18] [ 870/2502] eta: 0:20:42 lr: 0.000009 loss_cls: 3.6913 (3.8917) grad_norm: 2.3823 (2.3825) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 21:12:50 root] (utils.py 283): INFO Epoch: [18] [ 880/2502] eta: 0:20:34 lr: 0.000009 loss_cls: 3.8221 (3.8911) grad_norm: 2.3827 (2.3822) time: 0.7560 data: 0.0002 max mem: 8426 +[2024-12-10 21:12:57 root] (utils.py 283): INFO Epoch: [18] [ 890/2502] eta: 0:20:27 lr: 0.000009 loss_cls: 4.1024 (3.8908) grad_norm: 2.3571 (2.3821) time: 0.7563 data: 0.0002 max mem: 8426 +[2024-12-10 21:13:05 root] (utils.py 283): INFO Epoch: [18] [ 900/2502] eta: 0:20:19 lr: 0.000009 loss_cls: 3.9395 (3.8911) grad_norm: 2.3004 (2.3819) time: 0.7572 data: 0.0003 max mem: 8426 +[2024-12-10 21:13:13 root] (utils.py 283): INFO Epoch: [18] [ 910/2502] eta: 0:20:11 lr: 0.000009 loss_cls: 4.2436 (3.8957) grad_norm: 2.3085 (2.3811) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 21:13:20 root] (utils.py 283): INFO Epoch: [18] [ 920/2502] eta: 0:20:04 lr: 0.000009 loss_cls: 4.2622 (3.8978) grad_norm: 2.3191 (2.3814) time: 0.7567 data: 0.0002 max mem: 8426 +[2024-12-10 21:13:28 root] (utils.py 283): INFO Epoch: [18] [ 930/2502] eta: 0:19:56 lr: 0.000009 loss_cls: 4.2316 (3.8984) grad_norm: 2.3678 (2.3809) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 21:13:35 root] (utils.py 283): INFO Epoch: [18] [ 940/2502] eta: 0:19:49 lr: 0.000009 loss_cls: 4.1732 (3.8986) grad_norm: 2.3896 (2.3817) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 21:13:43 root] (utils.py 283): INFO Epoch: [18] [ 950/2502] eta: 0:19:41 lr: 0.000009 loss_cls: 4.1363 (3.8980) grad_norm: 2.3896 (2.3817) time: 0.7796 data: 0.0002 max mem: 8426 +[2024-12-10 21:13:51 root] (utils.py 283): INFO Epoch: [18] [ 960/2502] eta: 0:19:34 lr: 0.000009 loss_cls: 4.0496 (3.8983) grad_norm: 2.3797 (2.3820) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 21:13:58 root] (utils.py 283): INFO Epoch: [18] [ 970/2502] eta: 0:19:26 lr: 0.000009 loss_cls: 4.0650 (3.9001) grad_norm: 2.4032 (2.3823) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 21:14:06 root] (utils.py 283): INFO Epoch: [18] [ 980/2502] eta: 0:19:18 lr: 0.000009 loss_cls: 4.1393 (3.8998) grad_norm: 2.3891 (2.3823) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-10 21:14:14 root] (utils.py 283): INFO Epoch: [18] [ 990/2502] eta: 0:19:11 lr: 0.000009 loss_cls: 4.1026 (3.9009) grad_norm: 2.3502 (2.3819) time: 0.7578 data: 0.0002 max mem: 8426 +[2024-12-10 21:14:21 root] (utils.py 283): INFO Epoch: [18] [1000/2502] eta: 0:19:03 lr: 0.000009 loss_cls: 4.0242 (3.9014) grad_norm: 2.3552 (2.3820) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 21:14:29 root] (utils.py 283): INFO Epoch: [18] [1010/2502] eta: 0:18:56 lr: 0.000009 loss_cls: 3.9117 (3.9005) grad_norm: 2.3752 (2.3819) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 21:14:37 root] (utils.py 283): INFO Epoch: [18] [1020/2502] eta: 0:18:48 lr: 0.000009 loss_cls: 3.9117 (3.9001) grad_norm: 2.4234 (2.3819) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 21:14:44 root] (utils.py 283): INFO Epoch: [18] [1030/2502] eta: 0:18:40 lr: 0.000009 loss_cls: 3.9618 (3.9010) grad_norm: 2.4234 (2.3832) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 21:14:52 root] (utils.py 283): INFO Epoch: [18] [1040/2502] eta: 0:18:33 lr: 0.000009 loss_cls: 3.9157 (3.8995) grad_norm: 2.2964 (2.3824) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 21:14:59 root] (utils.py 283): INFO Epoch: [18] [1050/2502] eta: 0:18:25 lr: 0.000009 loss_cls: 3.9157 (3.8993) grad_norm: 2.3626 (2.3827) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 21:15:07 root] (utils.py 283): INFO Epoch: [18] [1060/2502] eta: 0:18:18 lr: 0.000009 loss_cls: 4.1797 (3.8992) grad_norm: 2.3811 (2.3818) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 21:15:15 root] (utils.py 283): INFO Epoch: [18] [1070/2502] eta: 0:18:10 lr: 0.000009 loss_cls: 4.0928 (3.8993) grad_norm: 2.2908 (2.3812) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 21:15:22 root] (utils.py 283): INFO Epoch: [18] [1080/2502] eta: 0:18:03 lr: 0.000009 loss_cls: 4.0939 (3.9006) grad_norm: 2.3484 (2.3816) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 21:15:30 root] (utils.py 283): INFO Epoch: [18] [1090/2502] eta: 0:17:55 lr: 0.000009 loss_cls: 4.1789 (3.9011) grad_norm: 2.3825 (2.3816) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 21:15:38 root] (utils.py 283): INFO Epoch: [18] [1100/2502] eta: 0:17:47 lr: 0.000009 loss_cls: 3.9503 (3.9001) grad_norm: 2.4546 (2.3824) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 21:15:45 root] (utils.py 283): INFO Epoch: [18] [1110/2502] eta: 0:17:40 lr: 0.000009 loss_cls: 4.0117 (3.9026) grad_norm: 2.3841 (2.3822) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 21:15:53 root] (utils.py 283): INFO Epoch: [18] [1120/2502] eta: 0:17:32 lr: 0.000009 loss_cls: 4.0117 (3.9029) grad_norm: 2.3368 (2.3820) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 21:16:01 root] (utils.py 283): INFO Epoch: [18] [1130/2502] eta: 0:17:25 lr: 0.000009 loss_cls: 3.8971 (3.9031) grad_norm: 2.3472 (2.3822) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 21:16:08 root] (utils.py 283): INFO Epoch: [18] [1140/2502] eta: 0:17:17 lr: 0.000009 loss_cls: 4.1782 (3.9053) grad_norm: 2.3593 (2.3820) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 21:16:16 root] (utils.py 283): INFO Epoch: [18] [1150/2502] eta: 0:17:09 lr: 0.000009 loss_cls: 4.0579 (3.9051) grad_norm: 2.3694 (2.3824) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 21:16:24 root] (utils.py 283): INFO Epoch: [18] [1160/2502] eta: 0:17:02 lr: 0.000009 loss_cls: 4.0579 (3.9080) grad_norm: 2.3800 (2.3829) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 21:16:31 root] (utils.py 283): INFO Epoch: [18] [1170/2502] eta: 0:16:54 lr: 0.000009 loss_cls: 3.9936 (3.9060) grad_norm: 2.3930 (2.3831) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 21:16:39 root] (utils.py 283): INFO Epoch: [18] [1180/2502] eta: 0:16:47 lr: 0.000009 loss_cls: 3.9220 (3.9065) grad_norm: 2.4334 (2.3840) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 21:16:47 root] (utils.py 283): INFO Epoch: [18] [1190/2502] eta: 0:16:39 lr: 0.000009 loss_cls: 3.9852 (3.9064) grad_norm: 2.4461 (2.3841) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 21:16:54 root] (utils.py 283): INFO Epoch: [18] [1200/2502] eta: 0:16:32 lr: 0.000009 loss_cls: 4.1026 (3.9051) grad_norm: 2.3265 (2.3832) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 21:17:02 root] (utils.py 283): INFO Epoch: [18] [1210/2502] eta: 0:16:24 lr: 0.000009 loss_cls: 3.9485 (3.9031) grad_norm: 2.2854 (2.3831) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 21:17:10 root] (utils.py 283): INFO Epoch: [18] [1220/2502] eta: 0:16:16 lr: 0.000009 loss_cls: 3.7094 (3.9030) grad_norm: 2.3920 (2.3829) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 21:17:17 root] (utils.py 283): INFO Epoch: [18] [1230/2502] eta: 0:16:09 lr: 0.000009 loss_cls: 3.8669 (3.9033) grad_norm: 2.3778 (2.3827) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 21:17:25 root] (utils.py 283): INFO Epoch: [18] [1240/2502] eta: 0:16:01 lr: 0.000009 loss_cls: 3.8869 (3.9023) grad_norm: 2.3444 (2.3825) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 21:17:33 root] (utils.py 283): INFO Epoch: [18] [1250/2502] eta: 0:15:54 lr: 0.000009 loss_cls: 4.1398 (3.9043) grad_norm: 2.3835 (2.3828) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 21:17:40 root] (utils.py 283): INFO Epoch: [18] [1260/2502] eta: 0:15:46 lr: 0.000009 loss_cls: 4.2111 (3.9056) grad_norm: 2.3739 (2.3827) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 21:17:48 root] (utils.py 283): INFO Epoch: [18] [1270/2502] eta: 0:15:39 lr: 0.000009 loss_cls: 3.7653 (3.9036) grad_norm: 2.3739 (2.3831) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 21:17:56 root] (utils.py 283): INFO Epoch: [18] [1280/2502] eta: 0:15:31 lr: 0.000009 loss_cls: 3.8360 (3.9052) grad_norm: 2.3900 (2.3831) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 21:18:03 root] (utils.py 283): INFO Epoch: [18] [1290/2502] eta: 0:15:23 lr: 0.000009 loss_cls: 4.0569 (3.9054) grad_norm: 2.3680 (2.3830) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:11 root] (utils.py 283): INFO Epoch: [18] [1300/2502] eta: 0:15:16 lr: 0.000009 loss_cls: 4.0569 (3.9069) grad_norm: 2.3736 (2.3831) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:18 root] (utils.py 283): INFO Epoch: [18] [1310/2502] eta: 0:15:08 lr: 0.000009 loss_cls: 4.0156 (3.9066) grad_norm: 2.3866 (2.3832) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:26 root] (utils.py 283): INFO Epoch: [18] [1320/2502] eta: 0:15:01 lr: 0.000009 loss_cls: 3.9951 (3.9057) grad_norm: 2.3945 (2.3836) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:34 root] (utils.py 283): INFO Epoch: [18] [1330/2502] eta: 0:14:53 lr: 0.000009 loss_cls: 4.0551 (3.9070) grad_norm: 2.3814 (2.3835) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:41 root] (utils.py 283): INFO Epoch: [18] [1340/2502] eta: 0:14:45 lr: 0.000009 loss_cls: 4.1355 (3.9075) grad_norm: 2.4413 (2.3842) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:49 root] (utils.py 283): INFO Epoch: [18] [1350/2502] eta: 0:14:38 lr: 0.000009 loss_cls: 4.0451 (3.9075) grad_norm: 2.4607 (2.3841) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 21:18:57 root] (utils.py 283): INFO Epoch: [18] [1360/2502] eta: 0:14:30 lr: 0.000009 loss_cls: 4.0446 (3.9084) grad_norm: 2.4223 (2.3845) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 21:19:04 root] (utils.py 283): INFO Epoch: [18] [1370/2502] eta: 0:14:22 lr: 0.000009 loss_cls: 4.0756 (3.9088) grad_norm: 2.3721 (2.3847) time: 0.7561 data: 0.0003 max mem: 8426 +[2024-12-10 21:19:12 root] (utils.py 283): INFO Epoch: [18] [1380/2502] eta: 0:14:15 lr: 0.000009 loss_cls: 4.1694 (3.9102) grad_norm: 2.3345 (2.3840) time: 0.7556 data: 0.0002 max mem: 8426 +[2024-12-10 21:19:19 root] (utils.py 283): INFO Epoch: [18] [1390/2502] eta: 0:14:07 lr: 0.000009 loss_cls: 4.0043 (3.9094) grad_norm: 2.3033 (2.3838) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 21:19:27 root] (utils.py 283): INFO Epoch: [18] [1400/2502] eta: 0:13:59 lr: 0.000009 loss_cls: 3.7644 (3.9073) grad_norm: 2.3304 (2.3836) time: 0.7590 data: 0.0003 max mem: 8426 +[2024-12-10 21:19:35 root] (utils.py 283): INFO Epoch: [18] [1410/2502] eta: 0:13:52 lr: 0.000009 loss_cls: 3.6057 (3.9066) grad_norm: 2.3775 (2.3840) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 21:19:42 root] (utils.py 283): INFO Epoch: [18] [1420/2502] eta: 0:13:44 lr: 0.000009 loss_cls: 4.0918 (3.9085) grad_norm: 2.4756 (2.3846) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 21:19:50 root] (utils.py 283): INFO Epoch: [18] [1430/2502] eta: 0:13:37 lr: 0.000009 loss_cls: 4.2058 (3.9097) grad_norm: 2.4619 (2.3847) time: 0.7569 data: 0.0002 max mem: 8426 +[2024-12-10 21:19:57 root] (utils.py 283): INFO Epoch: [18] [1440/2502] eta: 0:13:29 lr: 0.000009 loss_cls: 3.9010 (3.9090) grad_norm: 2.4413 (2.3844) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 21:20:05 root] (utils.py 283): INFO Epoch: [18] [1450/2502] eta: 0:13:21 lr: 0.000009 loss_cls: 3.9059 (3.9081) grad_norm: 2.3719 (2.3845) time: 0.7573 data: 0.0003 max mem: 8426 +[2024-12-10 21:20:12 root] (utils.py 283): INFO Epoch: [18] [1460/2502] eta: 0:13:14 lr: 0.000009 loss_cls: 3.8754 (3.9069) grad_norm: 2.3536 (2.3840) time: 0.7579 data: 0.0003 max mem: 8426 +[2024-12-10 21:20:20 root] (utils.py 283): INFO Epoch: [18] [1470/2502] eta: 0:13:06 lr: 0.000009 loss_cls: 3.6622 (3.9057) grad_norm: 2.3536 (2.3842) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 21:20:28 root] (utils.py 283): INFO Epoch: [18] [1480/2502] eta: 0:12:58 lr: 0.000009 loss_cls: 3.9807 (3.9058) grad_norm: 2.4258 (2.3846) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 21:20:35 root] (utils.py 283): INFO Epoch: [18] [1490/2502] eta: 0:12:51 lr: 0.000009 loss_cls: 4.1451 (3.9054) grad_norm: 2.3939 (2.3847) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 21:20:43 root] (utils.py 283): INFO Epoch: [18] [1500/2502] eta: 0:12:43 lr: 0.000009 loss_cls: 4.1378 (3.9062) grad_norm: 2.3696 (2.3844) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 21:20:51 root] (utils.py 283): INFO Epoch: [18] [1510/2502] eta: 0:12:36 lr: 0.000009 loss_cls: 4.0501 (3.9053) grad_norm: 2.3657 (2.3845) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 21:20:58 root] (utils.py 283): INFO Epoch: [18] [1520/2502] eta: 0:12:28 lr: 0.000009 loss_cls: 4.0098 (3.9056) grad_norm: 2.4067 (2.3847) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-10 21:21:06 root] (utils.py 283): INFO Epoch: [18] [1530/2502] eta: 0:12:20 lr: 0.000009 loss_cls: 4.0439 (3.9060) grad_norm: 2.3914 (2.3849) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 21:21:13 root] (utils.py 283): INFO Epoch: [18] [1540/2502] eta: 0:12:13 lr: 0.000009 loss_cls: 4.2302 (3.9070) grad_norm: 2.3631 (2.3849) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-10 21:21:21 root] (utils.py 283): INFO Epoch: [18] [1550/2502] eta: 0:12:05 lr: 0.000009 loss_cls: 4.1487 (3.9071) grad_norm: 2.3631 (2.3848) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 21:21:29 root] (utils.py 283): INFO Epoch: [18] [1560/2502] eta: 0:11:57 lr: 0.000009 loss_cls: 4.1378 (3.9072) grad_norm: 2.3548 (2.3849) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 21:21:36 root] (utils.py 283): INFO Epoch: [18] [1570/2502] eta: 0:11:50 lr: 0.000009 loss_cls: 4.0680 (3.9070) grad_norm: 2.3699 (2.3852) time: 0.7556 data: 0.0003 max mem: 8426 +[2024-12-10 21:21:44 root] (utils.py 283): INFO Epoch: [18] [1580/2502] eta: 0:11:42 lr: 0.000009 loss_cls: 4.0680 (3.9072) grad_norm: 2.3712 (2.3852) time: 0.7583 data: 0.0003 max mem: 8426 +[2024-12-10 21:21:51 root] (utils.py 283): INFO Epoch: [18] [1590/2502] eta: 0:11:34 lr: 0.000009 loss_cls: 4.0072 (3.9057) grad_norm: 2.3308 (2.3847) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 21:21:59 root] (utils.py 283): INFO Epoch: [18] [1600/2502] eta: 0:11:27 lr: 0.000009 loss_cls: 3.7541 (3.9066) grad_norm: 2.3128 (2.3852) time: 0.7586 data: 0.0003 max mem: 8426 +[2024-12-10 21:22:06 root] (utils.py 283): INFO Epoch: [18] [1610/2502] eta: 0:11:19 lr: 0.000009 loss_cls: 4.3308 (3.9082) grad_norm: 2.3885 (2.3850) time: 0.7559 data: 0.0002 max mem: 8426 +[2024-12-10 21:22:14 root] (utils.py 283): INFO Epoch: [18] [1620/2502] eta: 0:11:11 lr: 0.000009 loss_cls: 4.0766 (3.9069) grad_norm: 2.3561 (2.3850) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 21:22:22 root] (utils.py 283): INFO Epoch: [18] [1630/2502] eta: 0:11:04 lr: 0.000009 loss_cls: 3.8655 (3.9064) grad_norm: 2.3504 (2.3848) time: 0.7558 data: 0.0002 max mem: 8426 +[2024-12-10 21:22:29 root] (utils.py 283): INFO Epoch: [18] [1640/2502] eta: 0:10:56 lr: 0.000009 loss_cls: 4.0091 (3.9069) grad_norm: 2.4150 (2.3853) time: 0.7584 data: 0.0002 max mem: 8426 +[2024-12-10 21:22:37 root] (utils.py 283): INFO Epoch: [18] [1650/2502] eta: 0:10:48 lr: 0.000009 loss_cls: 3.9261 (3.9053) grad_norm: 2.4387 (2.3853) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 21:22:44 root] (utils.py 283): INFO Epoch: [18] [1660/2502] eta: 0:10:41 lr: 0.000009 loss_cls: 3.9594 (3.9054) grad_norm: 2.3360 (2.3851) time: 0.7548 data: 0.0003 max mem: 8426 +[2024-12-10 21:22:52 root] (utils.py 283): INFO Epoch: [18] [1670/2502] eta: 0:10:33 lr: 0.000009 loss_cls: 3.7985 (3.9038) grad_norm: 2.3080 (2.3846) time: 0.7562 data: 0.0003 max mem: 8426 +[2024-12-10 21:23:00 root] (utils.py 283): INFO Epoch: [18] [1680/2502] eta: 0:10:26 lr: 0.000009 loss_cls: 3.6851 (3.9037) grad_norm: 2.3535 (2.3851) time: 0.7611 data: 0.0003 max mem: 8426 +[2024-12-10 21:23:07 root] (utils.py 283): INFO Epoch: [18] [1690/2502] eta: 0:10:18 lr: 0.000009 loss_cls: 4.1509 (3.9049) grad_norm: 2.3957 (2.3851) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 21:23:15 root] (utils.py 283): INFO Epoch: [18] [1700/2502] eta: 0:10:10 lr: 0.000009 loss_cls: 3.9266 (3.9032) grad_norm: 2.3523 (2.3848) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 21:23:22 root] (utils.py 283): INFO Epoch: [18] [1710/2502] eta: 0:10:03 lr: 0.000009 loss_cls: 3.3059 (3.9010) grad_norm: 2.3523 (2.3848) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 21:23:30 root] (utils.py 283): INFO Epoch: [18] [1720/2502] eta: 0:09:55 lr: 0.000009 loss_cls: 3.5474 (3.9007) grad_norm: 2.3392 (2.3845) time: 0.7582 data: 0.0002 max mem: 8426 +[2024-12-10 21:23:37 root] (utils.py 283): INFO Epoch: [18] [1730/2502] eta: 0:09:47 lr: 0.000009 loss_cls: 4.1923 (3.9023) grad_norm: 2.3816 (2.3852) time: 0.7574 data: 0.0003 max mem: 8426 +[2024-12-10 21:23:45 root] (utils.py 283): INFO Epoch: [18] [1740/2502] eta: 0:09:40 lr: 0.000009 loss_cls: 4.2578 (3.9038) grad_norm: 2.3816 (2.3849) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 21:23:53 root] (utils.py 283): INFO Epoch: [18] [1750/2502] eta: 0:09:32 lr: 0.000009 loss_cls: 4.1369 (3.9052) grad_norm: 2.2866 (2.3847) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:00 root] (utils.py 283): INFO Epoch: [18] [1760/2502] eta: 0:09:25 lr: 0.000009 loss_cls: 4.2602 (3.9071) grad_norm: 2.4054 (2.3850) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:08 root] (utils.py 283): INFO Epoch: [18] [1770/2502] eta: 0:09:17 lr: 0.000009 loss_cls: 4.2440 (3.9061) grad_norm: 2.4205 (2.3853) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 21:24:15 root] (utils.py 283): INFO Epoch: [18] [1780/2502] eta: 0:09:09 lr: 0.000009 loss_cls: 4.1263 (3.9070) grad_norm: 2.3754 (2.3851) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:23 root] (utils.py 283): INFO Epoch: [18] [1790/2502] eta: 0:09:02 lr: 0.000009 loss_cls: 4.1609 (3.9084) grad_norm: 2.3382 (2.3849) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:31 root] (utils.py 283): INFO Epoch: [18] [1800/2502] eta: 0:08:54 lr: 0.000009 loss_cls: 4.1609 (3.9084) grad_norm: 2.3350 (2.3847) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:38 root] (utils.py 283): INFO Epoch: [18] [1810/2502] eta: 0:08:46 lr: 0.000009 loss_cls: 3.8608 (3.9083) grad_norm: 2.3256 (2.3844) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:46 root] (utils.py 283): INFO Epoch: [18] [1820/2502] eta: 0:08:39 lr: 0.000009 loss_cls: 3.8608 (3.9072) grad_norm: 2.3301 (2.3843) time: 0.7555 data: 0.0002 max mem: 8426 +[2024-12-10 21:24:54 root] (utils.py 283): INFO Epoch: [18] [1830/2502] eta: 0:08:31 lr: 0.000009 loss_cls: 3.8898 (3.9075) grad_norm: 2.3425 (2.3842) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:01 root] (utils.py 283): INFO Epoch: [18] [1840/2502] eta: 0:08:24 lr: 0.000009 loss_cls: 3.9412 (3.9066) grad_norm: 2.3098 (2.3837) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:09 root] (utils.py 283): INFO Epoch: [18] [1850/2502] eta: 0:08:16 lr: 0.000009 loss_cls: 4.0540 (3.9080) grad_norm: 2.3641 (2.3840) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:16 root] (utils.py 283): INFO Epoch: [18] [1860/2502] eta: 0:08:08 lr: 0.000009 loss_cls: 4.0725 (3.9082) grad_norm: 2.4078 (2.3843) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:24 root] (utils.py 283): INFO Epoch: [18] [1870/2502] eta: 0:08:01 lr: 0.000009 loss_cls: 3.7976 (3.9071) grad_norm: 2.3800 (2.3843) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:32 root] (utils.py 283): INFO Epoch: [18] [1880/2502] eta: 0:07:53 lr: 0.000009 loss_cls: 3.6348 (3.9048) grad_norm: 2.3662 (2.3842) time: 0.7554 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:39 root] (utils.py 283): INFO Epoch: [18] [1890/2502] eta: 0:07:46 lr: 0.000009 loss_cls: 4.0135 (3.9062) grad_norm: 2.3710 (2.3840) time: 0.7579 data: 0.0003 max mem: 8426 +[2024-12-10 21:25:47 root] (utils.py 283): INFO Epoch: [18] [1900/2502] eta: 0:07:38 lr: 0.000009 loss_cls: 4.2717 (3.9073) grad_norm: 2.3714 (2.3839) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 21:25:54 root] (utils.py 283): INFO Epoch: [18] [1910/2502] eta: 0:07:30 lr: 0.000009 loss_cls: 4.2385 (3.9090) grad_norm: 2.4251 (2.3842) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:02 root] (utils.py 283): INFO Epoch: [18] [1920/2502] eta: 0:07:23 lr: 0.000009 loss_cls: 4.0956 (3.9095) grad_norm: 2.4059 (2.3838) time: 0.7563 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:09 root] (utils.py 283): INFO Epoch: [18] [1930/2502] eta: 0:07:15 lr: 0.000009 loss_cls: 4.0438 (3.9109) grad_norm: 2.3754 (2.3839) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:17 root] (utils.py 283): INFO Epoch: [18] [1940/2502] eta: 0:07:07 lr: 0.000009 loss_cls: 3.9004 (3.9099) grad_norm: 2.3582 (2.3836) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:25 root] (utils.py 283): INFO Epoch: [18] [1950/2502] eta: 0:07:00 lr: 0.000009 loss_cls: 3.6107 (3.9094) grad_norm: 2.3582 (2.3839) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:32 root] (utils.py 283): INFO Epoch: [18] [1960/2502] eta: 0:06:52 lr: 0.000009 loss_cls: 3.6252 (3.9088) grad_norm: 2.3979 (2.3841) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:40 root] (utils.py 283): INFO Epoch: [18] [1970/2502] eta: 0:06:45 lr: 0.000009 loss_cls: 4.2008 (3.9108) grad_norm: 2.3810 (2.3836) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:48 root] (utils.py 283): INFO Epoch: [18] [1980/2502] eta: 0:06:37 lr: 0.000009 loss_cls: 4.1595 (3.9111) grad_norm: 2.2789 (2.3835) time: 0.7751 data: 0.0002 max mem: 8426 +[2024-12-10 21:26:56 root] (utils.py 283): INFO Epoch: [18] [1990/2502] eta: 0:06:29 lr: 0.000009 loss_cls: 4.1319 (3.9126) grad_norm: 2.3533 (2.3834) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:03 root] (utils.py 283): INFO Epoch: [18] [2000/2502] eta: 0:06:22 lr: 0.000009 loss_cls: 4.1398 (3.9132) grad_norm: 2.3639 (2.3835) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:11 root] (utils.py 283): INFO Epoch: [18] [2010/2502] eta: 0:06:14 lr: 0.000009 loss_cls: 3.9306 (3.9139) grad_norm: 2.4112 (2.3839) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:19 root] (utils.py 283): INFO Epoch: [18] [2020/2502] eta: 0:06:07 lr: 0.000009 loss_cls: 3.7911 (3.9131) grad_norm: 2.4112 (2.3839) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:26 root] (utils.py 283): INFO Epoch: [18] [2030/2502] eta: 0:05:59 lr: 0.000009 loss_cls: 3.6244 (3.9122) grad_norm: 2.3708 (2.3841) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:34 root] (utils.py 283): INFO Epoch: [18] [2040/2502] eta: 0:05:51 lr: 0.000009 loss_cls: 3.8330 (3.9118) grad_norm: 2.3244 (2.3838) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:42 root] (utils.py 283): INFO Epoch: [18] [2050/2502] eta: 0:05:44 lr: 0.000009 loss_cls: 3.9683 (3.9122) grad_norm: 2.3244 (2.3837) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:49 root] (utils.py 283): INFO Epoch: [18] [2060/2502] eta: 0:05:36 lr: 0.000009 loss_cls: 3.8611 (3.9112) grad_norm: 2.3645 (2.3839) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 21:27:57 root] (utils.py 283): INFO Epoch: [18] [2070/2502] eta: 0:05:29 lr: 0.000009 loss_cls: 3.8611 (3.9118) grad_norm: 2.3531 (2.3840) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 21:28:05 root] (utils.py 283): INFO Epoch: [18] [2080/2502] eta: 0:05:21 lr: 0.000009 loss_cls: 4.2814 (3.9128) grad_norm: 2.3531 (2.3841) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 21:28:12 root] (utils.py 283): INFO Epoch: [18] [2090/2502] eta: 0:05:13 lr: 0.000009 loss_cls: 3.4917 (3.9097) grad_norm: 2.3932 (2.3844) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 21:28:20 root] (utils.py 283): INFO Epoch: [18] [2100/2502] eta: 0:05:06 lr: 0.000009 loss_cls: 3.6050 (3.9105) grad_norm: 2.4202 (2.3845) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 21:28:27 root] (utils.py 283): INFO Epoch: [18] [2110/2502] eta: 0:04:58 lr: 0.000009 loss_cls: 4.1128 (3.9106) grad_norm: 2.3626 (2.3844) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 21:28:35 root] (utils.py 283): INFO Epoch: [18] [2120/2502] eta: 0:04:51 lr: 0.000009 loss_cls: 3.9818 (3.9111) grad_norm: 2.3770 (2.3848) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 21:28:43 root] (utils.py 283): INFO Epoch: [18] [2130/2502] eta: 0:04:43 lr: 0.000009 loss_cls: 3.9596 (3.9109) grad_norm: 2.4021 (2.3847) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 21:28:50 root] (utils.py 283): INFO Epoch: [18] [2140/2502] eta: 0:04:35 lr: 0.000009 loss_cls: 3.9596 (3.9109) grad_norm: 2.3777 (2.3847) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 21:28:58 root] (utils.py 283): INFO Epoch: [18] [2150/2502] eta: 0:04:28 lr: 0.000009 loss_cls: 3.9338 (3.9114) grad_norm: 2.3664 (2.3847) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 21:29:06 root] (utils.py 283): INFO Epoch: [18] [2160/2502] eta: 0:04:20 lr: 0.000009 loss_cls: 3.6975 (3.9091) grad_norm: 2.3766 (2.3847) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 21:29:13 root] (utils.py 283): INFO Epoch: [18] [2170/2502] eta: 0:04:12 lr: 0.000009 loss_cls: 3.6503 (3.9090) grad_norm: 2.3590 (2.3845) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 21:29:21 root] (utils.py 283): INFO Epoch: [18] [2180/2502] eta: 0:04:05 lr: 0.000009 loss_cls: 4.0717 (3.9096) grad_norm: 2.3709 (2.3850) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 21:29:28 root] (utils.py 283): INFO Epoch: [18] [2190/2502] eta: 0:03:57 lr: 0.000009 loss_cls: 4.1493 (3.9108) grad_norm: 2.4109 (2.3852) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 21:29:36 root] (utils.py 283): INFO Epoch: [18] [2200/2502] eta: 0:03:50 lr: 0.000009 loss_cls: 4.1493 (3.9114) grad_norm: 2.3758 (2.3851) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 21:29:44 root] (utils.py 283): INFO Epoch: [18] [2210/2502] eta: 0:03:42 lr: 0.000009 loss_cls: 4.2047 (3.9113) grad_norm: 2.3875 (2.3854) time: 0.7601 data: 0.0003 max mem: 8426 +[2024-12-10 21:29:51 root] (utils.py 283): INFO Epoch: [18] [2220/2502] eta: 0:03:34 lr: 0.000009 loss_cls: 4.1716 (3.9112) grad_norm: 2.3875 (2.3854) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 21:29:59 root] (utils.py 283): INFO Epoch: [18] [2230/2502] eta: 0:03:27 lr: 0.000009 loss_cls: 4.0648 (3.9122) grad_norm: 2.4047 (2.3858) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 21:30:07 root] (utils.py 283): INFO Epoch: [18] [2240/2502] eta: 0:03:19 lr: 0.000009 loss_cls: 4.1279 (3.9133) grad_norm: 2.4047 (2.3857) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 21:30:14 root] (utils.py 283): INFO Epoch: [18] [2250/2502] eta: 0:03:12 lr: 0.000009 loss_cls: 4.0765 (3.9131) grad_norm: 2.4025 (2.3860) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 21:30:22 root] (utils.py 283): INFO Epoch: [18] [2260/2502] eta: 0:03:04 lr: 0.000009 loss_cls: 3.8842 (3.9121) grad_norm: 2.4028 (2.3861) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 21:30:30 root] (utils.py 283): INFO Epoch: [18] [2270/2502] eta: 0:02:56 lr: 0.000009 loss_cls: 3.8960 (3.9124) grad_norm: 2.4096 (2.3864) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 21:30:37 root] (utils.py 283): INFO Epoch: [18] [2280/2502] eta: 0:02:49 lr: 0.000009 loss_cls: 4.1091 (3.9126) grad_norm: 2.4025 (2.3865) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 21:30:45 root] (utils.py 283): INFO Epoch: [18] [2290/2502] eta: 0:02:41 lr: 0.000009 loss_cls: 4.1091 (3.9123) grad_norm: 2.3668 (2.3865) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 21:30:52 root] (utils.py 283): INFO Epoch: [18] [2300/2502] eta: 0:02:33 lr: 0.000009 loss_cls: 3.7726 (3.9116) grad_norm: 2.3988 (2.3867) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 21:31:00 root] (utils.py 283): INFO Epoch: [18] [2310/2502] eta: 0:02:26 lr: 0.000009 loss_cls: 3.8514 (3.9125) grad_norm: 2.3873 (2.3866) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 21:31:08 root] (utils.py 283): INFO Epoch: [18] [2320/2502] eta: 0:02:18 lr: 0.000009 loss_cls: 4.2930 (3.9132) grad_norm: 2.3433 (2.3865) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 21:31:15 root] (utils.py 283): INFO Epoch: [18] [2330/2502] eta: 0:02:11 lr: 0.000009 loss_cls: 4.1847 (3.9132) grad_norm: 2.2906 (2.3864) time: 0.7706 data: 0.0003 max mem: 8426 +[2024-12-10 21:31:23 root] (utils.py 283): INFO Epoch: [18] [2340/2502] eta: 0:02:03 lr: 0.000009 loss_cls: 4.0095 (3.9125) grad_norm: 2.3325 (2.3866) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 21:31:31 root] (utils.py 283): INFO Epoch: [18] [2350/2502] eta: 0:01:55 lr: 0.000009 loss_cls: 3.7976 (3.9124) grad_norm: 2.4084 (2.3868) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 21:31:38 root] (utils.py 283): INFO Epoch: [18] [2360/2502] eta: 0:01:48 lr: 0.000009 loss_cls: 3.7997 (3.9127) grad_norm: 2.3921 (2.3867) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 21:31:46 root] (utils.py 283): INFO Epoch: [18] [2370/2502] eta: 0:01:40 lr: 0.000009 loss_cls: 4.0142 (3.9129) grad_norm: 2.3349 (2.3867) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 21:31:54 root] (utils.py 283): INFO Epoch: [18] [2380/2502] eta: 0:01:32 lr: 0.000009 loss_cls: 4.1176 (3.9140) grad_norm: 2.3349 (2.3867) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 21:32:01 root] (utils.py 283): INFO Epoch: [18] [2390/2502] eta: 0:01:25 lr: 0.000009 loss_cls: 4.1554 (3.9145) grad_norm: 2.2948 (2.3864) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 21:32:09 root] (utils.py 283): INFO Epoch: [18] [2400/2502] eta: 0:01:17 lr: 0.000009 loss_cls: 4.0288 (3.9150) grad_norm: 2.3494 (2.3865) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 21:32:17 root] (utils.py 283): INFO Epoch: [18] [2410/2502] eta: 0:01:10 lr: 0.000009 loss_cls: 4.0288 (3.9147) grad_norm: 2.3689 (2.3864) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 21:32:24 root] (utils.py 283): INFO Epoch: [18] [2420/2502] eta: 0:01:02 lr: 0.000009 loss_cls: 3.8099 (3.9141) grad_norm: 2.3764 (2.3867) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 21:32:32 root] (utils.py 283): INFO Epoch: [18] [2430/2502] eta: 0:00:54 lr: 0.000009 loss_cls: 3.9091 (3.9142) grad_norm: 2.4964 (2.3870) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 21:32:40 root] (utils.py 283): INFO Epoch: [18] [2440/2502] eta: 0:00:47 lr: 0.000009 loss_cls: 4.3366 (3.9160) grad_norm: 2.4679 (2.3872) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 21:32:47 root] (utils.py 283): INFO Epoch: [18] [2450/2502] eta: 0:00:39 lr: 0.000009 loss_cls: 4.3299 (3.9153) grad_norm: 2.4128 (2.3872) time: 0.7799 data: 0.0003 max mem: 8426 +[2024-12-10 21:32:55 root] (utils.py 283): INFO Epoch: [18] [2460/2502] eta: 0:00:32 lr: 0.000009 loss_cls: 3.6653 (3.9151) grad_norm: 2.4010 (2.3873) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 21:33:03 root] (utils.py 283): INFO Epoch: [18] [2470/2502] eta: 0:00:24 lr: 0.000009 loss_cls: 3.6653 (3.9140) grad_norm: 2.3441 (2.3871) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 21:33:11 root] (utils.py 283): INFO Epoch: [18] [2480/2502] eta: 0:00:16 lr: 0.000009 loss_cls: 3.5469 (3.9132) grad_norm: 2.3483 (2.3872) time: 0.7818 data: 0.0003 max mem: 8426 +[2024-12-10 21:33:19 root] (utils.py 283): INFO Epoch: [18] [2490/2502] eta: 0:00:09 lr: 0.000009 loss_cls: 4.2031 (3.9145) grad_norm: 2.3483 (2.3871) time: 0.7968 data: 0.0238 max mem: 8426 +[2024-12-10 21:33:27 root] (utils.py 283): INFO Epoch: [18] [2500/2502] eta: 0:00:01 lr: 0.000009 loss_cls: 4.3136 (3.9154) grad_norm: 2.3464 (2.3870) time: 0.7846 data: 0.0237 max mem: 8426 +[2024-12-10 21:33:27 root] (utils.py 283): INFO Epoch: [18] [2501/2502] eta: 0:00:00 lr: 0.000009 loss_cls: 4.3044 (3.9154) grad_norm: 2.3464 (2.3870) time: 0.7842 data: 0.0237 max mem: 8426 +[2024-12-10 21:33:27 root] (utils.py 297): INFO Epoch: [18] Total time: 0:31:48 (0.7627 s / it) +[2024-12-10 21:33:27 root] (engine.py 179): INFO Averaged stats:lr: 0.000009 loss_cls: 4.3044 (3.9145) grad_norm: 2.3464 (2.3870) +[2024-12-10 21:33:28 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6227 (0.6227) acc1: 85.1562 (85.1562) acc3: 96.8750 (96.8750) acc5: 98.4375 (98.4375) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 21:33:29 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7246 (0.8058) acc1: 85.1562 (82.3153) acc3: 94.5312 (93.3949) acc5: 96.8750 (96.3068) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 21:33:30 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8750 (0.8652) acc1: 78.1250 (80.9524) acc3: 91.4062 (92.7455) acc5: 95.3125 (95.4985) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 21:33:32 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9382 (0.8736) acc1: 78.1250 (80.2923) acc3: 92.1875 (92.9435) acc5: 95.3125 (95.6905) time: 0.1282 data: 0.0004 max mem: 8426 +[2024-12-10 21:33:33 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8092 (0.8638) acc1: 79.6875 (80.7355) acc3: 94.5312 (93.1021) acc5: 96.0938 (95.7889) time: 0.1282 data: 0.0004 max mem: 8426 +[2024-12-10 21:33:34 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0513 (0.9483) acc1: 75.0000 (78.7684) acc3: 88.2812 (91.7739) acc5: 92.9688 (94.6844) time: 0.1282 data: 0.0004 max mem: 8426 +[2024-12-10 21:33:36 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2322 (0.9951) acc1: 73.4375 (78.0994) acc3: 86.7188 (90.8427) acc5: 89.0625 (93.8525) time: 0.1287 data: 0.0005 max mem: 8426 +[2024-12-10 21:33:37 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1875 (1.0355) acc1: 74.2188 (77.2007) acc3: 86.7188 (90.3059) acc5: 90.6250 (93.3759) time: 0.1290 data: 0.0005 max mem: 8426 +[2024-12-10 21:33:38 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2090 (1.0689) acc1: 73.4375 (76.4853) acc3: 86.7188 (89.7569) acc5: 90.6250 (92.8916) time: 0.1438 data: 0.0157 max mem: 8426 +[2024-12-10 21:33:40 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2255 (1.0962) acc1: 71.0938 (75.8499) acc3: 85.9375 (89.4231) acc5: 89.8438 (92.6253) time: 0.1461 data: 0.0182 max mem: 8426 +[2024-12-10 21:33:41 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1330 (1.0846) acc1: 74.2188 (76.0640) acc3: 89.8438 (89.6000) acc5: 92.1875 (92.7840) time: 0.1457 data: 0.0182 max mem: 8426 +[2024-12-10 21:33:41 root] (utils.py 297): INFO Test: Total time: 0:00:12 (0.1321 s / it) +[2024-12-10 21:33:42 root] (engine.py 264): INFO * Acc@1 75.706 Acc@3 89.570 Acc@5 92.916 loss 1.087 flops 1.285 layer_flops 1.251 +[2024-12-10 21:33:42 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.7% +[2024-12-10 21:33:42 root] (main.py 576): INFO Max accuracy: 75.72% +[2024-12-10 21:33:43 root] (utils.py 283): INFO Epoch: [19] [ 0/2502] eta: 0:38:36 lr: 0.000008 loss_cls: 3.9357 (3.9357) grad_norm: 2.1049 (2.1049) time: 0.9257 data: 0.0002 max mem: 8426 +[2024-12-10 21:33:51 root] (utils.py 283): INFO Epoch: [19] [ 10/2502] eta: 0:32:12 lr: 0.000008 loss_cls: 3.8838 (3.7977) grad_norm: 2.3390 (2.3168) time: 0.7754 data: 0.0002 max mem: 8426 +[2024-12-10 21:33:59 root] (utils.py 283): INFO Epoch: [19] [ 20/2502] eta: 0:32:00 lr: 0.000008 loss_cls: 3.9717 (3.9086) grad_norm: 2.3091 (2.3096) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 21:34:06 root] (utils.py 283): INFO Epoch: [19] [ 30/2502] eta: 0:32:04 lr: 0.000008 loss_cls: 4.0471 (3.9018) grad_norm: 2.3220 (2.3500) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 21:34:14 root] (utils.py 283): INFO Epoch: [19] [ 40/2502] eta: 0:31:59 lr: 0.000008 loss_cls: 4.0471 (3.8842) grad_norm: 2.3980 (2.3694) time: 0.7855 data: 0.0002 max mem: 8426 +[2024-12-10 21:34:22 root] (utils.py 283): INFO Epoch: [19] [ 50/2502] eta: 0:31:44 lr: 0.000008 loss_cls: 4.0116 (3.8703) grad_norm: 2.4176 (2.3662) time: 0.7742 data: 0.0002 max mem: 8426 +[2024-12-10 21:34:30 root] (utils.py 283): INFO Epoch: [19] [ 60/2502] eta: 0:31:33 lr: 0.000008 loss_cls: 4.1215 (3.9000) grad_norm: 2.3400 (2.3662) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 21:34:37 root] (utils.py 283): INFO Epoch: [19] [ 70/2502] eta: 0:31:22 lr: 0.000008 loss_cls: 4.1376 (3.9136) grad_norm: 2.3400 (2.3789) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 21:34:45 root] (utils.py 283): INFO Epoch: [19] [ 80/2502] eta: 0:31:13 lr: 0.000008 loss_cls: 3.8491 (3.8938) grad_norm: 2.3777 (2.3801) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 21:34:53 root] (utils.py 283): INFO Epoch: [19] [ 90/2502] eta: 0:31:03 lr: 0.000008 loss_cls: 3.7028 (3.8774) grad_norm: 2.3615 (2.3787) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 21:35:00 root] (utils.py 283): INFO Epoch: [19] [ 100/2502] eta: 0:30:53 lr: 0.000008 loss_cls: 3.7028 (3.8766) grad_norm: 2.3334 (2.3789) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 21:35:08 root] (utils.py 283): INFO Epoch: [19] [ 110/2502] eta: 0:30:43 lr: 0.000008 loss_cls: 4.1559 (3.8951) grad_norm: 2.3763 (2.3821) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 21:35:16 root] (utils.py 283): INFO Epoch: [19] [ 120/2502] eta: 0:30:36 lr: 0.000008 loss_cls: 4.0969 (3.8987) grad_norm: 2.4101 (2.3817) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 21:35:23 root] (utils.py 283): INFO Epoch: [19] [ 130/2502] eta: 0:30:28 lr: 0.000008 loss_cls: 4.0254 (3.9043) grad_norm: 2.3592 (2.3791) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-10 21:35:31 root] (utils.py 283): INFO Epoch: [19] [ 140/2502] eta: 0:30:19 lr: 0.000008 loss_cls: 4.0511 (3.8996) grad_norm: 2.3393 (2.3784) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 21:35:39 root] (utils.py 283): INFO Epoch: [19] [ 150/2502] eta: 0:30:11 lr: 0.000008 loss_cls: 4.1349 (3.9039) grad_norm: 2.3710 (2.3789) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 21:35:46 root] (utils.py 283): INFO Epoch: [19] [ 160/2502] eta: 0:30:03 lr: 0.000008 loss_cls: 4.1349 (3.9024) grad_norm: 2.4080 (2.3838) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 21:35:54 root] (utils.py 283): INFO Epoch: [19] [ 170/2502] eta: 0:29:54 lr: 0.000008 loss_cls: 4.0252 (3.9089) grad_norm: 2.4605 (2.3866) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 21:36:02 root] (utils.py 283): INFO Epoch: [19] [ 180/2502] eta: 0:29:46 lr: 0.000008 loss_cls: 4.2609 (3.9227) grad_norm: 2.3652 (2.3820) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 21:36:09 root] (utils.py 283): INFO Epoch: [19] [ 190/2502] eta: 0:29:38 lr: 0.000008 loss_cls: 4.2396 (3.9237) grad_norm: 2.2870 (2.3817) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 21:36:17 root] (utils.py 283): INFO Epoch: [19] [ 200/2502] eta: 0:29:31 lr: 0.000008 loss_cls: 3.9266 (3.9203) grad_norm: 2.3234 (2.3806) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 21:36:25 root] (utils.py 283): INFO Epoch: [19] [ 210/2502] eta: 0:29:23 lr: 0.000008 loss_cls: 4.1377 (3.9321) grad_norm: 2.3651 (2.3819) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 21:36:32 root] (utils.py 283): INFO Epoch: [19] [ 220/2502] eta: 0:29:15 lr: 0.000008 loss_cls: 4.1822 (3.9425) grad_norm: 2.3596 (2.3795) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 21:36:40 root] (utils.py 283): INFO Epoch: [19] [ 230/2502] eta: 0:29:07 lr: 0.000008 loss_cls: 4.0672 (3.9421) grad_norm: 2.3437 (2.3798) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 21:36:48 root] (utils.py 283): INFO Epoch: [19] [ 240/2502] eta: 0:28:59 lr: 0.000008 loss_cls: 4.0367 (3.9455) grad_norm: 2.3325 (2.3787) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 21:36:55 root] (utils.py 283): INFO Epoch: [19] [ 250/2502] eta: 0:28:51 lr: 0.000008 loss_cls: 4.0367 (3.9510) grad_norm: 2.3457 (2.3801) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 21:37:03 root] (utils.py 283): INFO Epoch: [19] [ 260/2502] eta: 0:28:44 lr: 0.000008 loss_cls: 4.1546 (3.9586) grad_norm: 2.3572 (2.3793) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 21:37:11 root] (utils.py 283): INFO Epoch: [19] [ 270/2502] eta: 0:28:36 lr: 0.000008 loss_cls: 4.0012 (3.9507) grad_norm: 2.3398 (2.3780) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 21:37:18 root] (utils.py 283): INFO Epoch: [19] [ 280/2502] eta: 0:28:28 lr: 0.000008 loss_cls: 3.5593 (3.9410) grad_norm: 2.3797 (2.3799) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 21:37:26 root] (utils.py 283): INFO Epoch: [19] [ 290/2502] eta: 0:28:20 lr: 0.000008 loss_cls: 3.7760 (3.9467) grad_norm: 2.3814 (2.3801) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 21:37:34 root] (utils.py 283): INFO Epoch: [19] [ 300/2502] eta: 0:28:14 lr: 0.000008 loss_cls: 3.8352 (3.9412) grad_norm: 2.4148 (2.3828) time: 0.7751 data: 0.0003 max mem: 8426 +[2024-12-10 21:37:42 root] (utils.py 283): INFO Epoch: [19] [ 310/2502] eta: 0:28:06 lr: 0.000008 loss_cls: 3.8373 (3.9398) grad_norm: 2.4148 (2.3816) time: 0.7785 data: 0.0002 max mem: 8426 +[2024-12-10 21:37:49 root] (utils.py 283): INFO Epoch: [19] [ 320/2502] eta: 0:27:58 lr: 0.000008 loss_cls: 4.0245 (3.9365) grad_norm: 2.3804 (2.3827) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 21:37:57 root] (utils.py 283): INFO Epoch: [19] [ 330/2502] eta: 0:27:51 lr: 0.000008 loss_cls: 4.0245 (3.9456) grad_norm: 2.4109 (2.3843) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:05 root] (utils.py 283): INFO Epoch: [19] [ 340/2502] eta: 0:27:42 lr: 0.000008 loss_cls: 4.2272 (3.9468) grad_norm: 2.3605 (2.3841) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:12 root] (utils.py 283): INFO Epoch: [19] [ 350/2502] eta: 0:27:34 lr: 0.000008 loss_cls: 4.0443 (3.9449) grad_norm: 2.3479 (2.3837) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:20 root] (utils.py 283): INFO Epoch: [19] [ 360/2502] eta: 0:27:26 lr: 0.000008 loss_cls: 3.9381 (3.9489) grad_norm: 2.3558 (2.3828) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:28 root] (utils.py 283): INFO Epoch: [19] [ 370/2502] eta: 0:27:18 lr: 0.000008 loss_cls: 4.1578 (3.9495) grad_norm: 2.3492 (2.3809) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:35 root] (utils.py 283): INFO Epoch: [19] [ 380/2502] eta: 0:27:10 lr: 0.000008 loss_cls: 4.0012 (3.9488) grad_norm: 2.3492 (2.3809) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:43 root] (utils.py 283): INFO Epoch: [19] [ 390/2502] eta: 0:27:02 lr: 0.000008 loss_cls: 3.7846 (3.9444) grad_norm: 2.4306 (2.3820) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:50 root] (utils.py 283): INFO Epoch: [19] [ 400/2502] eta: 0:26:55 lr: 0.000008 loss_cls: 3.8024 (3.9436) grad_norm: 2.4135 (2.3835) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 21:38:58 root] (utils.py 283): INFO Epoch: [19] [ 410/2502] eta: 0:26:47 lr: 0.000008 loss_cls: 4.0378 (3.9422) grad_norm: 2.3410 (2.3834) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:06 root] (utils.py 283): INFO Epoch: [19] [ 420/2502] eta: 0:26:39 lr: 0.000008 loss_cls: 4.0132 (3.9411) grad_norm: 2.3863 (2.3848) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:13 root] (utils.py 283): INFO Epoch: [19] [ 430/2502] eta: 0:26:31 lr: 0.000008 loss_cls: 3.8407 (3.9369) grad_norm: 2.4080 (2.3864) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:21 root] (utils.py 283): INFO Epoch: [19] [ 440/2502] eta: 0:26:24 lr: 0.000008 loss_cls: 3.7801 (3.9302) grad_norm: 2.3650 (2.3853) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:29 root] (utils.py 283): INFO Epoch: [19] [ 450/2502] eta: 0:26:17 lr: 0.000008 loss_cls: 3.8858 (3.9347) grad_norm: 2.3736 (2.3875) time: 0.7816 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:37 root] (utils.py 283): INFO Epoch: [19] [ 460/2502] eta: 0:26:09 lr: 0.000008 loss_cls: 4.0850 (3.9321) grad_norm: 2.4639 (2.3882) time: 0.7780 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:44 root] (utils.py 283): INFO Epoch: [19] [ 470/2502] eta: 0:26:02 lr: 0.000008 loss_cls: 4.0040 (3.9364) grad_norm: 2.3978 (2.3881) time: 0.7724 data: 0.0003 max mem: 8426 +[2024-12-10 21:39:52 root] (utils.py 283): INFO Epoch: [19] [ 480/2502] eta: 0:25:55 lr: 0.000008 loss_cls: 4.2177 (3.9416) grad_norm: 2.3727 (2.3882) time: 0.7749 data: 0.0003 max mem: 8426 +[2024-12-10 21:40:00 root] (utils.py 283): INFO Epoch: [19] [ 490/2502] eta: 0:25:47 lr: 0.000008 loss_cls: 4.0469 (3.9376) grad_norm: 2.3682 (2.3874) time: 0.7791 data: 0.0002 max mem: 8426 +[2024-12-10 21:40:08 root] (utils.py 283): INFO Epoch: [19] [ 500/2502] eta: 0:25:40 lr: 0.000008 loss_cls: 3.9724 (3.9427) grad_norm: 2.3825 (2.3886) time: 0.7780 data: 0.0003 max mem: 8426 +[2024-12-10 21:40:16 root] (utils.py 283): INFO Epoch: [19] [ 510/2502] eta: 0:25:33 lr: 0.000008 loss_cls: 4.2234 (3.9435) grad_norm: 2.3595 (2.3885) time: 0.7811 data: 0.0003 max mem: 8426 +[2024-12-10 21:40:23 root] (utils.py 283): INFO Epoch: [19] [ 520/2502] eta: 0:25:25 lr: 0.000008 loss_cls: 4.1833 (3.9425) grad_norm: 2.3306 (2.3879) time: 0.7750 data: 0.0003 max mem: 8426 +[2024-12-10 21:40:31 root] (utils.py 283): INFO Epoch: [19] [ 530/2502] eta: 0:25:17 lr: 0.000008 loss_cls: 4.0811 (3.9430) grad_norm: 2.3306 (2.3878) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 21:40:39 root] (utils.py 283): INFO Epoch: [19] [ 540/2502] eta: 0:25:09 lr: 0.000008 loss_cls: 3.8497 (3.9387) grad_norm: 2.3256 (2.3864) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 21:40:46 root] (utils.py 283): INFO Epoch: [19] [ 550/2502] eta: 0:25:02 lr: 0.000008 loss_cls: 3.7199 (3.9368) grad_norm: 2.3117 (2.3867) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 21:40:54 root] (utils.py 283): INFO Epoch: [19] [ 560/2502] eta: 0:24:54 lr: 0.000008 loss_cls: 3.5968 (3.9279) grad_norm: 2.3582 (2.3861) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:02 root] (utils.py 283): INFO Epoch: [19] [ 570/2502] eta: 0:24:46 lr: 0.000008 loss_cls: 3.7848 (3.9277) grad_norm: 2.3703 (2.3870) time: 0.7723 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:10 root] (utils.py 283): INFO Epoch: [19] [ 580/2502] eta: 0:24:39 lr: 0.000008 loss_cls: 3.9787 (3.9256) grad_norm: 2.3720 (2.3866) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:17 root] (utils.py 283): INFO Epoch: [19] [ 590/2502] eta: 0:24:31 lr: 0.000008 loss_cls: 3.9359 (3.9281) grad_norm: 2.3338 (2.3862) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:25 root] (utils.py 283): INFO Epoch: [19] [ 600/2502] eta: 0:24:23 lr: 0.000008 loss_cls: 4.0242 (3.9265) grad_norm: 2.3562 (2.3868) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:32 root] (utils.py 283): INFO Epoch: [19] [ 610/2502] eta: 0:24:15 lr: 0.000008 loss_cls: 3.9192 (3.9242) grad_norm: 2.3875 (2.3869) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:40 root] (utils.py 283): INFO Epoch: [19] [ 620/2502] eta: 0:24:07 lr: 0.000008 loss_cls: 4.0774 (3.9288) grad_norm: 2.3908 (2.3865) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:48 root] (utils.py 283): INFO Epoch: [19] [ 630/2502] eta: 0:24:00 lr: 0.000008 loss_cls: 4.1122 (3.9308) grad_norm: 2.4052 (2.3878) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 21:41:55 root] (utils.py 283): INFO Epoch: [19] [ 640/2502] eta: 0:23:52 lr: 0.000008 loss_cls: 3.9893 (3.9277) grad_norm: 2.3503 (2.3871) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 21:42:03 root] (utils.py 283): INFO Epoch: [19] [ 650/2502] eta: 0:23:44 lr: 0.000008 loss_cls: 4.0358 (3.9307) grad_norm: 2.3418 (2.3871) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 21:42:11 root] (utils.py 283): INFO Epoch: [19] [ 660/2502] eta: 0:23:36 lr: 0.000008 loss_cls: 3.9300 (3.9246) grad_norm: 2.3788 (2.3868) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 21:42:19 root] (utils.py 283): INFO Epoch: [19] [ 670/2502] eta: 0:23:29 lr: 0.000008 loss_cls: 3.8568 (3.9261) grad_norm: 2.3688 (2.3869) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 21:42:26 root] (utils.py 283): INFO Epoch: [19] [ 680/2502] eta: 0:23:21 lr: 0.000008 loss_cls: 4.0949 (3.9291) grad_norm: 2.3335 (2.3874) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 21:42:34 root] (utils.py 283): INFO Epoch: [19] [ 690/2502] eta: 0:23:13 lr: 0.000008 loss_cls: 4.1294 (3.9297) grad_norm: 2.3892 (2.3879) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 21:42:41 root] (utils.py 283): INFO Epoch: [19] [ 700/2502] eta: 0:23:05 lr: 0.000008 loss_cls: 4.1294 (3.9350) grad_norm: 2.4209 (2.3878) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 21:42:49 root] (utils.py 283): INFO Epoch: [19] [ 710/2502] eta: 0:22:58 lr: 0.000008 loss_cls: 4.0997 (3.9373) grad_norm: 2.3668 (2.3874) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 21:42:57 root] (utils.py 283): INFO Epoch: [19] [ 720/2502] eta: 0:22:50 lr: 0.000008 loss_cls: 4.0313 (3.9351) grad_norm: 2.3981 (2.3877) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:05 root] (utils.py 283): INFO Epoch: [19] [ 730/2502] eta: 0:22:42 lr: 0.000008 loss_cls: 4.0313 (3.9342) grad_norm: 2.4311 (2.3893) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:12 root] (utils.py 283): INFO Epoch: [19] [ 740/2502] eta: 0:22:34 lr: 0.000008 loss_cls: 3.8601 (3.9337) grad_norm: 2.4310 (2.3895) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:20 root] (utils.py 283): INFO Epoch: [19] [ 750/2502] eta: 0:22:26 lr: 0.000008 loss_cls: 4.2896 (3.9392) grad_norm: 2.3518 (2.3895) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:27 root] (utils.py 283): INFO Epoch: [19] [ 760/2502] eta: 0:22:19 lr: 0.000008 loss_cls: 4.2266 (3.9372) grad_norm: 2.3548 (2.3896) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:35 root] (utils.py 283): INFO Epoch: [19] [ 770/2502] eta: 0:22:11 lr: 0.000008 loss_cls: 3.6812 (3.9325) grad_norm: 2.3548 (2.3897) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:43 root] (utils.py 283): INFO Epoch: [19] [ 780/2502] eta: 0:22:03 lr: 0.000008 loss_cls: 3.5658 (3.9296) grad_norm: 2.3400 (2.3897) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:50 root] (utils.py 283): INFO Epoch: [19] [ 790/2502] eta: 0:21:55 lr: 0.000008 loss_cls: 3.8390 (3.9288) grad_norm: 2.3208 (2.3896) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 21:43:58 root] (utils.py 283): INFO Epoch: [19] [ 800/2502] eta: 0:21:48 lr: 0.000008 loss_cls: 4.1453 (3.9335) grad_norm: 2.4224 (2.3903) time: 0.7711 data: 0.0003 max mem: 8426 +[2024-12-10 21:44:06 root] (utils.py 283): INFO Epoch: [19] [ 810/2502] eta: 0:21:40 lr: 0.000008 loss_cls: 4.3060 (3.9327) grad_norm: 2.4502 (2.3905) time: 0.7741 data: 0.0002 max mem: 8426 +[2024-12-10 21:44:14 root] (utils.py 283): INFO Epoch: [19] [ 820/2502] eta: 0:21:33 lr: 0.000008 loss_cls: 4.1118 (3.9326) grad_norm: 2.3452 (2.3911) time: 0.7743 data: 0.0002 max mem: 8426 +[2024-12-10 21:44:21 root] (utils.py 283): INFO Epoch: [19] [ 830/2502] eta: 0:21:25 lr: 0.000008 loss_cls: 4.1868 (3.9349) grad_norm: 2.3799 (2.3911) time: 0.7690 data: 0.0003 max mem: 8426 +[2024-12-10 21:44:29 root] (utils.py 283): INFO Epoch: [19] [ 840/2502] eta: 0:21:17 lr: 0.000008 loss_cls: 4.1331 (3.9364) grad_norm: 2.4271 (2.3920) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 21:44:37 root] (utils.py 283): INFO Epoch: [19] [ 850/2502] eta: 0:21:09 lr: 0.000008 loss_cls: 4.0960 (3.9366) grad_norm: 2.4274 (2.3928) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 21:44:44 root] (utils.py 283): INFO Epoch: [19] [ 860/2502] eta: 0:21:02 lr: 0.000008 loss_cls: 4.1473 (3.9399) grad_norm: 2.4222 (2.3925) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 21:44:52 root] (utils.py 283): INFO Epoch: [19] [ 870/2502] eta: 0:20:54 lr: 0.000008 loss_cls: 4.2944 (3.9395) grad_norm: 2.4678 (2.3935) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:00 root] (utils.py 283): INFO Epoch: [19] [ 880/2502] eta: 0:20:47 lr: 0.000008 loss_cls: 4.0515 (3.9391) grad_norm: 2.4523 (2.3934) time: 0.7734 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:07 root] (utils.py 283): INFO Epoch: [19] [ 890/2502] eta: 0:20:39 lr: 0.000008 loss_cls: 3.9011 (3.9361) grad_norm: 2.3694 (2.3929) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:15 root] (utils.py 283): INFO Epoch: [19] [ 900/2502] eta: 0:20:31 lr: 0.000008 loss_cls: 4.2513 (3.9413) grad_norm: 2.4236 (2.3936) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:23 root] (utils.py 283): INFO Epoch: [19] [ 910/2502] eta: 0:20:23 lr: 0.000008 loss_cls: 4.1793 (3.9428) grad_norm: 2.4179 (2.3940) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:30 root] (utils.py 283): INFO Epoch: [19] [ 920/2502] eta: 0:20:15 lr: 0.000008 loss_cls: 4.0802 (3.9414) grad_norm: 2.3874 (2.3937) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:38 root] (utils.py 283): INFO Epoch: [19] [ 930/2502] eta: 0:20:08 lr: 0.000008 loss_cls: 3.8772 (3.9407) grad_norm: 2.4505 (2.3945) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 21:45:46 root] (utils.py 283): INFO Epoch: [19] [ 940/2502] eta: 0:20:00 lr: 0.000008 loss_cls: 4.0589 (3.9394) grad_norm: 2.4602 (2.3944) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 21:45:53 root] (utils.py 283): INFO Epoch: [19] [ 950/2502] eta: 0:19:52 lr: 0.000008 loss_cls: 4.0866 (3.9398) grad_norm: 2.3199 (2.3944) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 21:46:01 root] (utils.py 283): INFO Epoch: [19] [ 960/2502] eta: 0:19:45 lr: 0.000008 loss_cls: 3.7656 (3.9390) grad_norm: 2.3427 (2.3943) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 21:46:09 root] (utils.py 283): INFO Epoch: [19] [ 970/2502] eta: 0:19:37 lr: 0.000008 loss_cls: 3.9661 (3.9405) grad_norm: 2.4108 (2.3946) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 21:46:16 root] (utils.py 283): INFO Epoch: [19] [ 980/2502] eta: 0:19:29 lr: 0.000008 loss_cls: 4.0205 (3.9404) grad_norm: 2.3688 (2.3946) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 21:46:24 root] (utils.py 283): INFO Epoch: [19] [ 990/2502] eta: 0:19:22 lr: 0.000008 loss_cls: 3.7750 (3.9369) grad_norm: 2.3989 (2.3949) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 21:46:32 root] (utils.py 283): INFO Epoch: [19] [1000/2502] eta: 0:19:14 lr: 0.000008 loss_cls: 3.6938 (3.9368) grad_norm: 2.3989 (2.3953) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 21:46:39 root] (utils.py 283): INFO Epoch: [19] [1010/2502] eta: 0:19:06 lr: 0.000008 loss_cls: 3.6938 (3.9330) grad_norm: 2.3433 (2.3952) time: 0.7695 data: 0.0003 max mem: 8426 +[2024-12-10 21:46:47 root] (utils.py 283): INFO Epoch: [19] [1020/2502] eta: 0:18:59 lr: 0.000008 loss_cls: 3.9769 (3.9344) grad_norm: 2.3393 (2.3952) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 21:46:55 root] (utils.py 283): INFO Epoch: [19] [1030/2502] eta: 0:18:51 lr: 0.000008 loss_cls: 4.0552 (3.9335) grad_norm: 2.3461 (2.3955) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 21:47:03 root] (utils.py 283): INFO Epoch: [19] [1040/2502] eta: 0:18:43 lr: 0.000008 loss_cls: 3.9827 (3.9331) grad_norm: 2.4052 (2.3955) time: 0.7711 data: 0.0003 max mem: 8426 +[2024-12-10 21:47:10 root] (utils.py 283): INFO Epoch: [19] [1050/2502] eta: 0:18:36 lr: 0.000008 loss_cls: 4.0025 (3.9325) grad_norm: 2.4255 (2.3955) time: 0.7770 data: 0.0002 max mem: 8426 +[2024-12-10 21:47:18 root] (utils.py 283): INFO Epoch: [19] [1060/2502] eta: 0:18:28 lr: 0.000008 loss_cls: 4.0025 (3.9316) grad_norm: 2.3391 (2.3951) time: 0.7841 data: 0.0002 max mem: 8426 +[2024-12-10 21:47:26 root] (utils.py 283): INFO Epoch: [19] [1070/2502] eta: 0:18:20 lr: 0.000008 loss_cls: 4.1035 (3.9311) grad_norm: 2.3389 (2.3946) time: 0.7741 data: 0.0003 max mem: 8426 +[2024-12-10 21:47:34 root] (utils.py 283): INFO Epoch: [19] [1080/2502] eta: 0:18:13 lr: 0.000008 loss_cls: 4.2218 (3.9315) grad_norm: 2.3582 (2.3951) time: 0.7744 data: 0.0003 max mem: 8426 +[2024-12-10 21:47:41 root] (utils.py 283): INFO Epoch: [19] [1090/2502] eta: 0:18:05 lr: 0.000008 loss_cls: 4.2294 (3.9329) grad_norm: 2.3646 (2.3949) time: 0.7756 data: 0.0002 max mem: 8426 +[2024-12-10 21:47:49 root] (utils.py 283): INFO Epoch: [19] [1100/2502] eta: 0:17:58 lr: 0.000008 loss_cls: 3.8485 (3.9308) grad_norm: 2.3534 (2.3947) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 21:47:57 root] (utils.py 283): INFO Epoch: [19] [1110/2502] eta: 0:17:50 lr: 0.000008 loss_cls: 3.8119 (3.9322) grad_norm: 2.3741 (2.3946) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 21:48:04 root] (utils.py 283): INFO Epoch: [19] [1120/2502] eta: 0:17:42 lr: 0.000008 loss_cls: 3.9810 (3.9304) grad_norm: 2.3822 (2.3948) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 21:48:12 root] (utils.py 283): INFO Epoch: [19] [1130/2502] eta: 0:17:34 lr: 0.000008 loss_cls: 3.7068 (3.9287) grad_norm: 2.4118 (2.3955) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 21:48:20 root] (utils.py 283): INFO Epoch: [19] [1140/2502] eta: 0:17:27 lr: 0.000008 loss_cls: 4.0097 (3.9298) grad_norm: 2.4088 (2.3948) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 21:48:27 root] (utils.py 283): INFO Epoch: [19] [1150/2502] eta: 0:17:19 lr: 0.000008 loss_cls: 4.1281 (3.9323) grad_norm: 2.4133 (2.3952) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 21:48:35 root] (utils.py 283): INFO Epoch: [19] [1160/2502] eta: 0:17:11 lr: 0.000008 loss_cls: 3.9295 (3.9302) grad_norm: 2.4594 (2.3957) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 21:48:43 root] (utils.py 283): INFO Epoch: [19] [1170/2502] eta: 0:17:03 lr: 0.000008 loss_cls: 3.8493 (3.9299) grad_norm: 2.4342 (2.3958) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 21:48:50 root] (utils.py 283): INFO Epoch: [19] [1180/2502] eta: 0:16:56 lr: 0.000008 loss_cls: 3.6627 (3.9265) grad_norm: 2.3820 (2.3958) time: 0.7602 data: 0.0002 max mem: 8426 +[2024-12-10 21:48:58 root] (utils.py 283): INFO Epoch: [19] [1190/2502] eta: 0:16:48 lr: 0.000008 loss_cls: 3.7201 (3.9266) grad_norm: 2.3530 (2.3955) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 21:49:05 root] (utils.py 283): INFO Epoch: [19] [1200/2502] eta: 0:16:40 lr: 0.000008 loss_cls: 3.7644 (3.9261) grad_norm: 2.4040 (2.3959) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 21:49:13 root] (utils.py 283): INFO Epoch: [19] [1210/2502] eta: 0:16:32 lr: 0.000008 loss_cls: 3.9823 (3.9266) grad_norm: 2.4293 (2.3957) time: 0.7579 data: 0.0002 max mem: 8426 +[2024-12-10 21:49:21 root] (utils.py 283): INFO Epoch: [19] [1220/2502] eta: 0:16:25 lr: 0.000008 loss_cls: 4.0918 (3.9273) grad_norm: 2.3870 (2.3958) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 21:49:28 root] (utils.py 283): INFO Epoch: [19] [1230/2502] eta: 0:16:17 lr: 0.000008 loss_cls: 4.1646 (3.9273) grad_norm: 2.3879 (2.3957) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 21:49:36 root] (utils.py 283): INFO Epoch: [19] [1240/2502] eta: 0:16:09 lr: 0.000008 loss_cls: 4.0479 (3.9278) grad_norm: 2.3136 (2.3950) time: 0.7580 data: 0.0003 max mem: 8426 +[2024-12-10 21:49:43 root] (utils.py 283): INFO Epoch: [19] [1250/2502] eta: 0:16:01 lr: 0.000008 loss_cls: 3.7847 (3.9253) grad_norm: 2.3151 (2.3949) time: 0.7587 data: 0.0003 max mem: 8426 +[2024-12-10 21:49:51 root] (utils.py 283): INFO Epoch: [19] [1260/2502] eta: 0:15:53 lr: 0.000008 loss_cls: 4.0323 (3.9259) grad_norm: 2.3674 (2.3947) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 21:49:59 root] (utils.py 283): INFO Epoch: [19] [1270/2502] eta: 0:15:46 lr: 0.000008 loss_cls: 4.0584 (3.9274) grad_norm: 2.3459 (2.3942) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 21:50:07 root] (utils.py 283): INFO Epoch: [19] [1280/2502] eta: 0:15:38 lr: 0.000008 loss_cls: 3.9732 (3.9270) grad_norm: 2.3640 (2.3942) time: 0.7825 data: 0.0002 max mem: 8426 +[2024-12-10 21:50:14 root] (utils.py 283): INFO Epoch: [19] [1290/2502] eta: 0:15:31 lr: 0.000008 loss_cls: 3.8947 (3.9258) grad_norm: 2.3663 (2.3941) time: 0.7844 data: 0.0003 max mem: 8426 +[2024-12-10 21:50:22 root] (utils.py 283): INFO Epoch: [19] [1300/2502] eta: 0:15:23 lr: 0.000008 loss_cls: 3.7715 (3.9236) grad_norm: 2.3303 (2.3938) time: 0.7853 data: 0.0003 max mem: 8426 +[2024-12-10 21:50:30 root] (utils.py 283): INFO Epoch: [19] [1310/2502] eta: 0:15:16 lr: 0.000008 loss_cls: 3.7120 (3.9227) grad_norm: 2.3229 (2.3936) time: 0.7816 data: 0.0003 max mem: 8426 +[2024-12-10 21:50:38 root] (utils.py 283): INFO Epoch: [19] [1320/2502] eta: 0:15:08 lr: 0.000008 loss_cls: 3.7876 (3.9221) grad_norm: 2.4021 (2.3940) time: 0.7711 data: 0.0003 max mem: 8426 +[2024-12-10 21:50:45 root] (utils.py 283): INFO Epoch: [19] [1330/2502] eta: 0:15:00 lr: 0.000008 loss_cls: 3.7723 (3.9211) grad_norm: 2.3496 (2.3934) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 21:50:53 root] (utils.py 283): INFO Epoch: [19] [1340/2502] eta: 0:14:52 lr: 0.000008 loss_cls: 3.8583 (3.9224) grad_norm: 2.3458 (2.3936) time: 0.7556 data: 0.0003 max mem: 8426 +[2024-12-10 21:51:00 root] (utils.py 283): INFO Epoch: [19] [1350/2502] eta: 0:14:45 lr: 0.000008 loss_cls: 4.2067 (3.9242) grad_norm: 2.4213 (2.3936) time: 0.7549 data: 0.0003 max mem: 8426 +[2024-12-10 21:51:08 root] (utils.py 283): INFO Epoch: [19] [1360/2502] eta: 0:14:37 lr: 0.000008 loss_cls: 4.2067 (3.9257) grad_norm: 2.3250 (2.3933) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 21:51:16 root] (utils.py 283): INFO Epoch: [19] [1370/2502] eta: 0:14:29 lr: 0.000008 loss_cls: 4.1702 (3.9259) grad_norm: 2.3590 (2.3936) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 21:51:23 root] (utils.py 283): INFO Epoch: [19] [1380/2502] eta: 0:14:21 lr: 0.000008 loss_cls: 4.0805 (3.9284) grad_norm: 2.3964 (2.3935) time: 0.7581 data: 0.0003 max mem: 8426 +[2024-12-10 21:51:31 root] (utils.py 283): INFO Epoch: [19] [1390/2502] eta: 0:14:14 lr: 0.000008 loss_cls: 4.0619 (3.9292) grad_norm: 2.3881 (2.3939) time: 0.7574 data: 0.0003 max mem: 8426 +[2024-12-10 21:51:38 root] (utils.py 283): INFO Epoch: [19] [1400/2502] eta: 0:14:06 lr: 0.000008 loss_cls: 4.0619 (3.9287) grad_norm: 2.3449 (2.3936) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 21:51:46 root] (utils.py 283): INFO Epoch: [19] [1410/2502] eta: 0:13:58 lr: 0.000008 loss_cls: 3.9477 (3.9278) grad_norm: 2.3341 (2.3938) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 21:51:54 root] (utils.py 283): INFO Epoch: [19] [1420/2502] eta: 0:13:50 lr: 0.000008 loss_cls: 3.7922 (3.9266) grad_norm: 2.3709 (2.3941) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 21:52:01 root] (utils.py 283): INFO Epoch: [19] [1430/2502] eta: 0:13:43 lr: 0.000008 loss_cls: 3.8408 (3.9267) grad_norm: 2.4061 (2.3942) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-10 21:52:09 root] (utils.py 283): INFO Epoch: [19] [1440/2502] eta: 0:13:35 lr: 0.000008 loss_cls: 4.1376 (3.9282) grad_norm: 2.4003 (2.3943) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 21:52:16 root] (utils.py 283): INFO Epoch: [19] [1450/2502] eta: 0:13:27 lr: 0.000008 loss_cls: 4.2315 (3.9291) grad_norm: 2.4003 (2.3946) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 21:52:24 root] (utils.py 283): INFO Epoch: [19] [1460/2502] eta: 0:13:19 lr: 0.000008 loss_cls: 4.0196 (3.9271) grad_norm: 2.4354 (2.3955) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 21:52:31 root] (utils.py 283): INFO Epoch: [19] [1470/2502] eta: 0:13:12 lr: 0.000008 loss_cls: 3.8822 (3.9279) grad_norm: 2.4207 (2.3962) time: 0.7570 data: 0.0003 max mem: 8426 +[2024-12-10 21:52:39 root] (utils.py 283): INFO Epoch: [19] [1480/2502] eta: 0:13:04 lr: 0.000008 loss_cls: 4.1446 (3.9281) grad_norm: 2.3865 (2.3962) time: 0.7591 data: 0.0003 max mem: 8426 +[2024-12-10 21:52:47 root] (utils.py 283): INFO Epoch: [19] [1490/2502] eta: 0:12:56 lr: 0.000008 loss_cls: 3.9980 (3.9286) grad_norm: 2.3244 (2.3959) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 21:52:54 root] (utils.py 283): INFO Epoch: [19] [1500/2502] eta: 0:12:48 lr: 0.000008 loss_cls: 3.8164 (3.9262) grad_norm: 2.3842 (2.3961) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 21:53:02 root] (utils.py 283): INFO Epoch: [19] [1510/2502] eta: 0:12:41 lr: 0.000008 loss_cls: 3.7889 (3.9263) grad_norm: 2.4174 (2.3962) time: 0.7617 data: 0.0003 max mem: 8426 +[2024-12-10 21:53:10 root] (utils.py 283): INFO Epoch: [19] [1520/2502] eta: 0:12:33 lr: 0.000008 loss_cls: 4.1980 (3.9271) grad_norm: 2.4497 (2.3971) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 21:53:17 root] (utils.py 283): INFO Epoch: [19] [1530/2502] eta: 0:12:25 lr: 0.000008 loss_cls: 4.1812 (3.9273) grad_norm: 2.4048 (2.3970) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 21:53:25 root] (utils.py 283): INFO Epoch: [19] [1540/2502] eta: 0:12:18 lr: 0.000008 loss_cls: 3.9007 (3.9254) grad_norm: 2.3563 (2.3969) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 21:53:33 root] (utils.py 283): INFO Epoch: [19] [1550/2502] eta: 0:12:10 lr: 0.000008 loss_cls: 3.8376 (3.9254) grad_norm: 2.3709 (2.3972) time: 0.7735 data: 0.0002 max mem: 8426 +[2024-12-10 21:53:40 root] (utils.py 283): INFO Epoch: [19] [1560/2502] eta: 0:12:02 lr: 0.000008 loss_cls: 3.9306 (3.9258) grad_norm: 2.3364 (2.3974) time: 0.7787 data: 0.0002 max mem: 8426 +[2024-12-10 21:53:48 root] (utils.py 283): INFO Epoch: [19] [1570/2502] eta: 0:11:55 lr: 0.000008 loss_cls: 4.0961 (3.9267) grad_norm: 2.3134 (2.3972) time: 0.7731 data: 0.0002 max mem: 8426 +[2024-12-10 21:53:56 root] (utils.py 283): INFO Epoch: [19] [1580/2502] eta: 0:11:47 lr: 0.000008 loss_cls: 4.0976 (3.9275) grad_norm: 2.3394 (2.3967) time: 0.7732 data: 0.0003 max mem: 8426 +[2024-12-10 21:54:04 root] (utils.py 283): INFO Epoch: [19] [1590/2502] eta: 0:11:40 lr: 0.000008 loss_cls: 4.0347 (3.9264) grad_norm: 2.3389 (2.3967) time: 0.7794 data: 0.0003 max mem: 8426 +[2024-12-10 21:54:12 root] (utils.py 283): INFO Epoch: [19] [1600/2502] eta: 0:11:32 lr: 0.000008 loss_cls: 3.9339 (3.9270) grad_norm: 2.4031 (2.3969) time: 0.7827 data: 0.0002 max mem: 8426 +[2024-12-10 21:54:19 root] (utils.py 283): INFO Epoch: [19] [1610/2502] eta: 0:11:24 lr: 0.000008 loss_cls: 4.0855 (3.9259) grad_norm: 2.4076 (2.3969) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 21:54:27 root] (utils.py 283): INFO Epoch: [19] [1620/2502] eta: 0:11:17 lr: 0.000008 loss_cls: 4.1804 (3.9263) grad_norm: 2.3840 (2.3968) time: 0.7791 data: 0.0002 max mem: 8426 +[2024-12-10 21:54:35 root] (utils.py 283): INFO Epoch: [19] [1630/2502] eta: 0:11:09 lr: 0.000008 loss_cls: 4.2739 (3.9275) grad_norm: 2.3581 (2.3966) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 21:54:42 root] (utils.py 283): INFO Epoch: [19] [1640/2502] eta: 0:11:01 lr: 0.000008 loss_cls: 4.1646 (3.9282) grad_norm: 2.3985 (2.3965) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 21:54:50 root] (utils.py 283): INFO Epoch: [19] [1650/2502] eta: 0:10:54 lr: 0.000008 loss_cls: 4.1195 (3.9288) grad_norm: 2.4014 (2.3967) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 21:54:58 root] (utils.py 283): INFO Epoch: [19] [1660/2502] eta: 0:10:46 lr: 0.000008 loss_cls: 4.1195 (3.9298) grad_norm: 2.3409 (2.3964) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 21:55:05 root] (utils.py 283): INFO Epoch: [19] [1670/2502] eta: 0:10:38 lr: 0.000008 loss_cls: 4.0839 (3.9302) grad_norm: 2.3517 (2.3968) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 21:55:13 root] (utils.py 283): INFO Epoch: [19] [1680/2502] eta: 0:10:31 lr: 0.000008 loss_cls: 4.2477 (3.9305) grad_norm: 2.4370 (2.3969) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 21:55:21 root] (utils.py 283): INFO Epoch: [19] [1690/2502] eta: 0:10:23 lr: 0.000008 loss_cls: 4.0388 (3.9313) grad_norm: 2.3301 (2.3964) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 21:55:28 root] (utils.py 283): INFO Epoch: [19] [1700/2502] eta: 0:10:15 lr: 0.000008 loss_cls: 3.8171 (3.9295) grad_norm: 2.3443 (2.3963) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 21:55:36 root] (utils.py 283): INFO Epoch: [19] [1710/2502] eta: 0:10:08 lr: 0.000008 loss_cls: 3.6268 (3.9281) grad_norm: 2.3717 (2.3962) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 21:55:44 root] (utils.py 283): INFO Epoch: [19] [1720/2502] eta: 0:10:00 lr: 0.000008 loss_cls: 3.8698 (3.9284) grad_norm: 2.3681 (2.3961) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 21:55:51 root] (utils.py 283): INFO Epoch: [19] [1730/2502] eta: 0:09:52 lr: 0.000008 loss_cls: 4.0041 (3.9291) grad_norm: 2.3666 (2.3961) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 21:55:59 root] (utils.py 283): INFO Epoch: [19] [1740/2502] eta: 0:09:45 lr: 0.000008 loss_cls: 4.0041 (3.9297) grad_norm: 2.3707 (2.3963) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 21:56:07 root] (utils.py 283): INFO Epoch: [19] [1750/2502] eta: 0:09:37 lr: 0.000008 loss_cls: 3.8578 (3.9278) grad_norm: 2.4305 (2.3964) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 21:56:14 root] (utils.py 283): INFO Epoch: [19] [1760/2502] eta: 0:09:29 lr: 0.000008 loss_cls: 3.9342 (3.9281) grad_norm: 2.3917 (2.3961) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 21:56:22 root] (utils.py 283): INFO Epoch: [19] [1770/2502] eta: 0:09:21 lr: 0.000008 loss_cls: 3.9879 (3.9287) grad_norm: 2.3959 (2.3963) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 21:56:30 root] (utils.py 283): INFO Epoch: [19] [1780/2502] eta: 0:09:14 lr: 0.000008 loss_cls: 3.7836 (3.9274) grad_norm: 2.4294 (2.3967) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 21:56:37 root] (utils.py 283): INFO Epoch: [19] [1790/2502] eta: 0:09:06 lr: 0.000008 loss_cls: 3.6901 (3.9265) grad_norm: 2.4260 (2.3968) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 21:56:45 root] (utils.py 283): INFO Epoch: [19] [1800/2502] eta: 0:08:58 lr: 0.000008 loss_cls: 3.9959 (3.9271) grad_norm: 2.4198 (2.3971) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 21:56:53 root] (utils.py 283): INFO Epoch: [19] [1810/2502] eta: 0:08:51 lr: 0.000008 loss_cls: 4.1112 (3.9285) grad_norm: 2.3764 (2.3969) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 21:57:01 root] (utils.py 283): INFO Epoch: [19] [1820/2502] eta: 0:08:43 lr: 0.000008 loss_cls: 4.1986 (3.9291) grad_norm: 2.3764 (2.3971) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 21:57:08 root] (utils.py 283): INFO Epoch: [19] [1830/2502] eta: 0:08:35 lr: 0.000008 loss_cls: 4.2672 (3.9300) grad_norm: 2.4212 (2.3971) time: 0.7734 data: 0.0003 max mem: 8426 +[2024-12-10 21:57:16 root] (utils.py 283): INFO Epoch: [19] [1840/2502] eta: 0:08:28 lr: 0.000008 loss_cls: 4.2359 (3.9295) grad_norm: 2.4212 (2.3973) time: 0.7722 data: 0.0003 max mem: 8426 +[2024-12-10 21:57:24 root] (utils.py 283): INFO Epoch: [19] [1850/2502] eta: 0:08:20 lr: 0.000008 loss_cls: 4.1591 (3.9303) grad_norm: 2.4546 (2.3975) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 21:57:31 root] (utils.py 283): INFO Epoch: [19] [1860/2502] eta: 0:08:12 lr: 0.000008 loss_cls: 4.2705 (3.9324) grad_norm: 2.4488 (2.3976) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 21:57:39 root] (utils.py 283): INFO Epoch: [19] [1870/2502] eta: 0:08:05 lr: 0.000008 loss_cls: 4.2705 (3.9317) grad_norm: 2.4349 (2.3975) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 21:57:47 root] (utils.py 283): INFO Epoch: [19] [1880/2502] eta: 0:07:57 lr: 0.000008 loss_cls: 4.0439 (3.9318) grad_norm: 2.3832 (2.3975) time: 0.7575 data: 0.0003 max mem: 8426 +[2024-12-10 21:57:54 root] (utils.py 283): INFO Epoch: [19] [1890/2502] eta: 0:07:49 lr: 0.000008 loss_cls: 3.8716 (3.9307) grad_norm: 2.3901 (2.3973) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 21:58:02 root] (utils.py 283): INFO Epoch: [19] [1900/2502] eta: 0:07:42 lr: 0.000008 loss_cls: 3.8836 (3.9305) grad_norm: 2.4221 (2.3974) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 21:58:09 root] (utils.py 283): INFO Epoch: [19] [1910/2502] eta: 0:07:34 lr: 0.000008 loss_cls: 3.9414 (3.9305) grad_norm: 2.3187 (2.3971) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 21:58:17 root] (utils.py 283): INFO Epoch: [19] [1920/2502] eta: 0:07:26 lr: 0.000008 loss_cls: 4.1452 (3.9319) grad_norm: 2.3133 (2.3970) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 21:58:25 root] (utils.py 283): INFO Epoch: [19] [1930/2502] eta: 0:07:19 lr: 0.000008 loss_cls: 4.0987 (3.9314) grad_norm: 2.3414 (2.3970) time: 0.7693 data: 0.0003 max mem: 8426 +[2024-12-10 21:58:33 root] (utils.py 283): INFO Epoch: [19] [1940/2502] eta: 0:07:11 lr: 0.000008 loss_cls: 3.8213 (3.9300) grad_norm: 2.3414 (2.3967) time: 0.7785 data: 0.0002 max mem: 8426 +[2024-12-10 21:58:40 root] (utils.py 283): INFO Epoch: [19] [1950/2502] eta: 0:07:03 lr: 0.000008 loss_cls: 3.9057 (3.9289) grad_norm: 2.3340 (2.3964) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-10 21:58:48 root] (utils.py 283): INFO Epoch: [19] [1960/2502] eta: 0:06:56 lr: 0.000008 loss_cls: 3.9057 (3.9285) grad_norm: 2.3719 (2.3965) time: 0.7738 data: 0.0002 max mem: 8426 +[2024-12-10 21:58:56 root] (utils.py 283): INFO Epoch: [19] [1970/2502] eta: 0:06:48 lr: 0.000008 loss_cls: 4.0939 (3.9284) grad_norm: 2.3856 (2.3969) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-10 21:59:04 root] (utils.py 283): INFO Epoch: [19] [1980/2502] eta: 0:06:40 lr: 0.000008 loss_cls: 4.2108 (3.9303) grad_norm: 2.4616 (2.3969) time: 0.7725 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:11 root] (utils.py 283): INFO Epoch: [19] [1990/2502] eta: 0:06:33 lr: 0.000008 loss_cls: 4.1944 (3.9284) grad_norm: 2.3528 (2.3970) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:19 root] (utils.py 283): INFO Epoch: [19] [2000/2502] eta: 0:06:25 lr: 0.000008 loss_cls: 3.6424 (3.9281) grad_norm: 2.4099 (2.3972) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:26 root] (utils.py 283): INFO Epoch: [19] [2010/2502] eta: 0:06:17 lr: 0.000008 loss_cls: 4.0047 (3.9281) grad_norm: 2.4124 (2.3971) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:34 root] (utils.py 283): INFO Epoch: [19] [2020/2502] eta: 0:06:10 lr: 0.000008 loss_cls: 4.0662 (3.9279) grad_norm: 2.4207 (2.3974) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:42 root] (utils.py 283): INFO Epoch: [19] [2030/2502] eta: 0:06:02 lr: 0.000008 loss_cls: 3.7574 (3.9263) grad_norm: 2.3534 (2.3973) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:49 root] (utils.py 283): INFO Epoch: [19] [2040/2502] eta: 0:05:54 lr: 0.000008 loss_cls: 3.7574 (3.9260) grad_norm: 2.3659 (2.3975) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 21:59:57 root] (utils.py 283): INFO Epoch: [19] [2050/2502] eta: 0:05:47 lr: 0.000008 loss_cls: 3.9692 (3.9261) grad_norm: 2.3659 (2.3970) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 22:00:05 root] (utils.py 283): INFO Epoch: [19] [2060/2502] eta: 0:05:39 lr: 0.000008 loss_cls: 3.9504 (3.9262) grad_norm: 2.3138 (2.3968) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 22:00:12 root] (utils.py 283): INFO Epoch: [19] [2070/2502] eta: 0:05:31 lr: 0.000008 loss_cls: 3.9504 (3.9258) grad_norm: 2.3258 (2.3966) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 22:00:20 root] (utils.py 283): INFO Epoch: [19] [2080/2502] eta: 0:05:23 lr: 0.000008 loss_cls: 3.8944 (3.9252) grad_norm: 2.4056 (2.3969) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 22:00:28 root] (utils.py 283): INFO Epoch: [19] [2090/2502] eta: 0:05:16 lr: 0.000008 loss_cls: 3.8944 (3.9249) grad_norm: 2.3597 (2.3965) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 22:00:35 root] (utils.py 283): INFO Epoch: [19] [2100/2502] eta: 0:05:08 lr: 0.000008 loss_cls: 3.7122 (3.9240) grad_norm: 2.3597 (2.3968) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 22:00:43 root] (utils.py 283): INFO Epoch: [19] [2110/2502] eta: 0:05:00 lr: 0.000008 loss_cls: 4.0441 (3.9251) grad_norm: 2.4258 (2.3969) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 22:00:51 root] (utils.py 283): INFO Epoch: [19] [2120/2502] eta: 0:04:53 lr: 0.000008 loss_cls: 4.1632 (3.9247) grad_norm: 2.3693 (2.3969) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 22:00:58 root] (utils.py 283): INFO Epoch: [19] [2130/2502] eta: 0:04:45 lr: 0.000008 loss_cls: 3.9561 (3.9236) grad_norm: 2.3980 (2.3972) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 22:01:06 root] (utils.py 283): INFO Epoch: [19] [2140/2502] eta: 0:04:37 lr: 0.000008 loss_cls: 3.9050 (3.9235) grad_norm: 2.4887 (2.3975) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 22:01:14 root] (utils.py 283): INFO Epoch: [19] [2150/2502] eta: 0:04:30 lr: 0.000008 loss_cls: 3.9129 (3.9244) grad_norm: 2.4047 (2.3973) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 22:01:21 root] (utils.py 283): INFO Epoch: [19] [2160/2502] eta: 0:04:22 lr: 0.000008 loss_cls: 4.0708 (3.9236) grad_norm: 2.3670 (2.3974) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 22:01:29 root] (utils.py 283): INFO Epoch: [19] [2170/2502] eta: 0:04:14 lr: 0.000008 loss_cls: 3.7467 (3.9222) grad_norm: 2.3929 (2.3975) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 22:01:36 root] (utils.py 283): INFO Epoch: [19] [2180/2502] eta: 0:04:07 lr: 0.000008 loss_cls: 3.4489 (3.9214) grad_norm: 2.4069 (2.3973) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 22:01:44 root] (utils.py 283): INFO Epoch: [19] [2190/2502] eta: 0:03:59 lr: 0.000008 loss_cls: 3.7758 (3.9212) grad_norm: 2.3708 (2.3972) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 22:01:52 root] (utils.py 283): INFO Epoch: [19] [2200/2502] eta: 0:03:51 lr: 0.000008 loss_cls: 4.0197 (3.9222) grad_norm: 2.3338 (2.3968) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 22:01:59 root] (utils.py 283): INFO Epoch: [19] [2210/2502] eta: 0:03:44 lr: 0.000008 loss_cls: 4.0623 (3.9216) grad_norm: 2.3290 (2.3967) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 22:02:07 root] (utils.py 283): INFO Epoch: [19] [2220/2502] eta: 0:03:36 lr: 0.000008 loss_cls: 3.6472 (3.9204) grad_norm: 2.3406 (2.3971) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 22:02:15 root] (utils.py 283): INFO Epoch: [19] [2230/2502] eta: 0:03:28 lr: 0.000008 loss_cls: 3.8459 (3.9214) grad_norm: 2.4277 (2.3973) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 22:02:22 root] (utils.py 283): INFO Epoch: [19] [2240/2502] eta: 0:03:21 lr: 0.000008 loss_cls: 3.9177 (3.9201) grad_norm: 2.3924 (2.3972) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 22:02:30 root] (utils.py 283): INFO Epoch: [19] [2250/2502] eta: 0:03:13 lr: 0.000008 loss_cls: 3.8890 (3.9205) grad_norm: 2.3485 (2.3974) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 22:02:38 root] (utils.py 283): INFO Epoch: [19] [2260/2502] eta: 0:03:05 lr: 0.000008 loss_cls: 4.0413 (3.9201) grad_norm: 2.3731 (2.3974) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 22:02:45 root] (utils.py 283): INFO Epoch: [19] [2270/2502] eta: 0:02:58 lr: 0.000008 loss_cls: 4.0413 (3.9203) grad_norm: 2.3498 (2.3972) time: 0.7731 data: 0.0003 max mem: 8426 +[2024-12-10 22:02:53 root] (utils.py 283): INFO Epoch: [19] [2280/2502] eta: 0:02:50 lr: 0.000008 loss_cls: 4.2546 (3.9214) grad_norm: 2.3873 (2.3973) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 22:03:01 root] (utils.py 283): INFO Epoch: [19] [2290/2502] eta: 0:02:42 lr: 0.000008 loss_cls: 4.1636 (3.9215) grad_norm: 2.4028 (2.3972) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 22:03:08 root] (utils.py 283): INFO Epoch: [19] [2300/2502] eta: 0:02:35 lr: 0.000008 loss_cls: 3.9213 (3.9215) grad_norm: 2.4144 (2.3976) time: 0.7673 data: 0.0003 max mem: 8426 +[2024-12-10 22:03:16 root] (utils.py 283): INFO Epoch: [19] [2310/2502] eta: 0:02:27 lr: 0.000008 loss_cls: 3.9455 (3.9212) grad_norm: 2.3695 (2.3971) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 22:03:24 root] (utils.py 283): INFO Epoch: [19] [2320/2502] eta: 0:02:19 lr: 0.000008 loss_cls: 3.7997 (3.9197) grad_norm: 2.2632 (2.3969) time: 0.7707 data: 0.0003 max mem: 8426 +[2024-12-10 22:03:32 root] (utils.py 283): INFO Epoch: [19] [2330/2502] eta: 0:02:12 lr: 0.000008 loss_cls: 3.8929 (3.9208) grad_norm: 2.3171 (2.3968) time: 0.7771 data: 0.0002 max mem: 8426 +[2024-12-10 22:03:39 root] (utils.py 283): INFO Epoch: [19] [2340/2502] eta: 0:02:04 lr: 0.000008 loss_cls: 4.3150 (3.9219) grad_norm: 2.4030 (2.3974) time: 0.7710 data: 0.0002 max mem: 8426 +[2024-12-10 22:03:47 root] (utils.py 283): INFO Epoch: [19] [2350/2502] eta: 0:01:56 lr: 0.000008 loss_cls: 4.1752 (3.9222) grad_norm: 2.4144 (2.3973) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 22:03:55 root] (utils.py 283): INFO Epoch: [19] [2360/2502] eta: 0:01:48 lr: 0.000008 loss_cls: 4.0814 (3.9225) grad_norm: 2.4140 (2.3971) time: 0.7587 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:02 root] (utils.py 283): INFO Epoch: [19] [2370/2502] eta: 0:01:41 lr: 0.000008 loss_cls: 4.0814 (3.9230) grad_norm: 2.3123 (2.3970) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:10 root] (utils.py 283): INFO Epoch: [19] [2380/2502] eta: 0:01:33 lr: 0.000008 loss_cls: 4.1548 (3.9237) grad_norm: 2.4184 (2.3973) time: 0.7748 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:18 root] (utils.py 283): INFO Epoch: [19] [2390/2502] eta: 0:01:25 lr: 0.000008 loss_cls: 3.9902 (3.9231) grad_norm: 2.3641 (2.3971) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:25 root] (utils.py 283): INFO Epoch: [19] [2400/2502] eta: 0:01:18 lr: 0.000008 loss_cls: 4.0457 (3.9245) grad_norm: 2.4236 (2.3975) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 22:04:33 root] (utils.py 283): INFO Epoch: [19] [2410/2502] eta: 0:01:10 lr: 0.000008 loss_cls: 4.2990 (3.9255) grad_norm: 2.4282 (2.3976) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:41 root] (utils.py 283): INFO Epoch: [19] [2420/2502] eta: 0:01:02 lr: 0.000008 loss_cls: 4.1593 (3.9247) grad_norm: 2.4144 (2.3977) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:48 root] (utils.py 283): INFO Epoch: [19] [2430/2502] eta: 0:00:55 lr: 0.000008 loss_cls: 3.8880 (3.9247) grad_norm: 2.4315 (2.3981) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 22:04:56 root] (utils.py 283): INFO Epoch: [19] [2440/2502] eta: 0:00:47 lr: 0.000008 loss_cls: 4.0991 (3.9251) grad_norm: 2.4086 (2.3982) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 22:05:04 root] (utils.py 283): INFO Epoch: [19] [2450/2502] eta: 0:00:39 lr: 0.000008 loss_cls: 4.1176 (3.9261) grad_norm: 2.3532 (2.3981) time: 0.7772 data: 0.0003 max mem: 8426 +[2024-12-10 22:05:12 root] (utils.py 283): INFO Epoch: [19] [2460/2502] eta: 0:00:32 lr: 0.000008 loss_cls: 4.0665 (3.9249) grad_norm: 2.3423 (2.3980) time: 0.7809 data: 0.0002 max mem: 8426 +[2024-12-10 22:05:19 root] (utils.py 283): INFO Epoch: [19] [2470/2502] eta: 0:00:24 lr: 0.000008 loss_cls: 3.7050 (3.9239) grad_norm: 2.3125 (2.3975) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 22:05:27 root] (utils.py 283): INFO Epoch: [19] [2480/2502] eta: 0:00:16 lr: 0.000008 loss_cls: 3.8017 (3.9239) grad_norm: 2.3207 (2.3974) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 22:05:35 root] (utils.py 283): INFO Epoch: [19] [2490/2502] eta: 0:00:09 lr: 0.000008 loss_cls: 3.8840 (3.9242) grad_norm: 2.4008 (2.3974) time: 0.7853 data: 0.0236 max mem: 8426 +[2024-12-10 22:05:43 root] (utils.py 283): INFO Epoch: [19] [2500/2502] eta: 0:00:01 lr: 0.000008 loss_cls: 3.8840 (3.9226) grad_norm: 2.4129 (2.3975) time: 0.7863 data: 0.0236 max mem: 8426 +[2024-12-10 22:05:43 root] (utils.py 283): INFO Epoch: [19] [2501/2502] eta: 0:00:00 lr: 0.000008 loss_cls: 3.6495 (3.9222) grad_norm: 2.4129 (2.3976) time: 0.7859 data: 0.0236 max mem: 8426 +[2024-12-10 22:05:43 root] (utils.py 297): INFO Epoch: [19] Total time: 0:32:01 (0.7679 s / it) +[2024-12-10 22:05:44 root] (engine.py 179): INFO Averaged stats:lr: 0.000008 loss_cls: 3.6495 (3.9101) grad_norm: 2.4129 (2.3976) +[2024-12-10 22:05:44 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6229 (0.6229) acc1: 86.7188 (86.7188) acc3: 96.0938 (96.0938) acc5: 97.6562 (97.6562) time: 0.1274 data: 0.0003 max mem: 8426 +[2024-12-10 22:05:45 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7364 (0.7954) acc1: 86.7188 (82.1733) acc3: 95.3125 (93.5369) acc5: 96.8750 (96.5909) time: 0.1280 data: 0.0004 max mem: 8426 +[2024-12-10 22:05:47 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8509 (0.8559) acc1: 78.1250 (80.9524) acc3: 92.1875 (93.0060) acc5: 95.3125 (95.7589) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 22:05:48 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9411 (0.8690) acc1: 78.1250 (80.3427) acc3: 93.7500 (93.3972) acc5: 95.3125 (95.9173) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 22:05:49 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8281 (0.8594) acc1: 81.2500 (80.6593) acc3: 94.5312 (93.4451) acc5: 96.8750 (95.9032) time: 0.1436 data: 0.0159 max mem: 8426 +[2024-12-10 22:05:51 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0645 (0.9440) acc1: 75.0000 (78.7377) acc3: 89.0625 (92.0650) acc5: 92.1875 (94.8223) time: 0.1553 data: 0.0270 max mem: 8426 +[2024-12-10 22:05:53 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2489 (0.9883) acc1: 71.8750 (77.9969) acc3: 86.7188 (91.1501) acc5: 89.8438 (94.0702) time: 0.1626 data: 0.0343 max mem: 8426 +[2024-12-10 22:05:54 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1987 (1.0279) acc1: 74.2188 (77.1897) acc3: 86.7188 (90.5700) acc5: 90.6250 (93.6290) time: 0.1697 data: 0.0417 max mem: 8426 +[2024-12-10 22:05:56 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2166 (1.0626) acc1: 73.4375 (76.4178) acc3: 85.9375 (90.0077) acc5: 90.6250 (93.1424) time: 0.1494 data: 0.0211 max mem: 8426 +[2024-12-10 22:05:57 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2311 (1.0889) acc1: 71.8750 (75.7555) acc3: 85.9375 (89.6806) acc5: 90.6250 (92.8743) time: 0.1441 data: 0.0159 max mem: 8426 +[2024-12-10 22:05:58 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1411 (1.0774) acc1: 74.2188 (75.9760) acc3: 89.0625 (89.8480) acc5: 91.4062 (93.0480) time: 0.1418 data: 0.0159 max mem: 8426 +[2024-12-10 22:05:58 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1452 s / it) +[2024-12-10 22:05:58 root] (engine.py 264): INFO * Acc@1 75.780 Acc@3 89.704 Acc@5 92.946 loss 1.079 flops 1.285 layer_flops 1.251 +[2024-12-10 22:05:58 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.8% +[2024-12-10 22:05:58 root] (main.py 576): INFO Max accuracy: 75.78% +[2024-12-10 22:05:59 root] (utils.py 283): INFO Epoch: [20] [ 0/2502] eta: 0:32:33 lr: 0.000007 loss_cls: 2.6289 (2.6289) grad_norm: 2.4778 (2.4778) time: 0.7806 data: 0.0003 max mem: 8426 +[2024-12-10 22:06:07 root] (utils.py 283): INFO Epoch: [20] [ 10/2502] eta: 0:31:34 lr: 0.000007 loss_cls: 4.1328 (3.8363) grad_norm: 2.4778 (2.4663) time: 0.7601 data: 0.0003 max mem: 8426 +[2024-12-10 22:06:14 root] (utils.py 283): INFO Epoch: [20] [ 20/2502] eta: 0:31:27 lr: 0.000007 loss_cls: 4.1853 (3.9711) grad_norm: 2.4193 (2.4320) time: 0.7594 data: 0.0003 max mem: 8426 +[2024-12-10 22:06:22 root] (utils.py 283): INFO Epoch: [20] [ 30/2502] eta: 0:31:16 lr: 0.000007 loss_cls: 4.2406 (4.0227) grad_norm: 2.3538 (2.4070) time: 0.7587 data: 0.0002 max mem: 8426 +[2024-12-10 22:06:29 root] (utils.py 283): INFO Epoch: [20] [ 40/2502] eta: 0:31:06 lr: 0.000007 loss_cls: 4.0305 (4.0069) grad_norm: 2.3474 (2.4072) time: 0.7558 data: 0.0002 max mem: 8426 +[2024-12-10 22:06:37 root] (utils.py 283): INFO Epoch: [20] [ 50/2502] eta: 0:31:00 lr: 0.000007 loss_cls: 3.9528 (3.9666) grad_norm: 2.3536 (2.4110) time: 0.7582 data: 0.0002 max mem: 8426 +[2024-12-10 22:06:45 root] (utils.py 283): INFO Epoch: [20] [ 60/2502] eta: 0:30:56 lr: 0.000007 loss_cls: 3.9528 (3.9562) grad_norm: 2.3913 (2.4079) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 22:06:52 root] (utils.py 283): INFO Epoch: [20] [ 70/2502] eta: 0:30:45 lr: 0.000007 loss_cls: 3.6797 (3.8739) grad_norm: 2.3913 (2.4019) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:00 root] (utils.py 283): INFO Epoch: [20] [ 80/2502] eta: 0:30:38 lr: 0.000007 loss_cls: 3.8032 (3.9014) grad_norm: 2.3940 (2.4025) time: 0.7552 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:07 root] (utils.py 283): INFO Epoch: [20] [ 90/2502] eta: 0:30:31 lr: 0.000007 loss_cls: 3.8032 (3.8531) grad_norm: 2.4658 (2.4164) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:15 root] (utils.py 283): INFO Epoch: [20] [ 100/2502] eta: 0:30:29 lr: 0.000007 loss_cls: 3.6614 (3.8573) grad_norm: 2.3853 (2.4079) time: 0.7717 data: 0.0003 max mem: 8426 +[2024-12-10 22:07:23 root] (utils.py 283): INFO Epoch: [20] [ 110/2502] eta: 0:30:25 lr: 0.000007 loss_cls: 3.9065 (3.8488) grad_norm: 2.3853 (2.4119) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:31 root] (utils.py 283): INFO Epoch: [20] [ 120/2502] eta: 0:30:21 lr: 0.000007 loss_cls: 4.0614 (3.8556) grad_norm: 2.3578 (2.4069) time: 0.7821 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:39 root] (utils.py 283): INFO Epoch: [20] [ 130/2502] eta: 0:30:16 lr: 0.000007 loss_cls: 4.1186 (3.8560) grad_norm: 2.3499 (2.4067) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:46 root] (utils.py 283): INFO Epoch: [20] [ 140/2502] eta: 0:30:10 lr: 0.000007 loss_cls: 3.9717 (3.8496) grad_norm: 2.3499 (2.4031) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-10 22:07:54 root] (utils.py 283): INFO Epoch: [20] [ 150/2502] eta: 0:30:02 lr: 0.000007 loss_cls: 3.9717 (3.8607) grad_norm: 2.3375 (2.3983) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 22:08:02 root] (utils.py 283): INFO Epoch: [20] [ 160/2502] eta: 0:29:54 lr: 0.000007 loss_cls: 4.0214 (3.8516) grad_norm: 2.3891 (2.4016) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 22:08:09 root] (utils.py 283): INFO Epoch: [20] [ 170/2502] eta: 0:29:48 lr: 0.000007 loss_cls: 4.0259 (3.8664) grad_norm: 2.3923 (2.3976) time: 0.7703 data: 0.0003 max mem: 8426 +[2024-12-10 22:08:17 root] (utils.py 283): INFO Epoch: [20] [ 180/2502] eta: 0:29:40 lr: 0.000007 loss_cls: 3.9990 (3.8646) grad_norm: 2.3174 (2.3946) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 22:08:25 root] (utils.py 283): INFO Epoch: [20] [ 190/2502] eta: 0:29:34 lr: 0.000007 loss_cls: 3.9743 (3.8683) grad_norm: 2.3457 (2.3915) time: 0.7738 data: 0.0002 max mem: 8426 +[2024-12-10 22:08:32 root] (utils.py 283): INFO Epoch: [20] [ 200/2502] eta: 0:29:25 lr: 0.000007 loss_cls: 3.8971 (3.8665) grad_norm: 2.3061 (2.3872) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 22:08:40 root] (utils.py 283): INFO Epoch: [20] [ 210/2502] eta: 0:29:18 lr: 0.000007 loss_cls: 4.1866 (3.8726) grad_norm: 2.2909 (2.3829) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 22:08:48 root] (utils.py 283): INFO Epoch: [20] [ 220/2502] eta: 0:29:10 lr: 0.000007 loss_cls: 4.0704 (3.8711) grad_norm: 2.3058 (2.3818) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 22:08:55 root] (utils.py 283): INFO Epoch: [20] [ 230/2502] eta: 0:29:02 lr: 0.000007 loss_cls: 4.0297 (3.8754) grad_norm: 2.3296 (2.3815) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 22:09:03 root] (utils.py 283): INFO Epoch: [20] [ 240/2502] eta: 0:28:53 lr: 0.000007 loss_cls: 4.1113 (3.8810) grad_norm: 2.3384 (2.3810) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 22:09:11 root] (utils.py 283): INFO Epoch: [20] [ 250/2502] eta: 0:28:46 lr: 0.000007 loss_cls: 4.1695 (3.8891) grad_norm: 2.3064 (2.3786) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 22:09:18 root] (utils.py 283): INFO Epoch: [20] [ 260/2502] eta: 0:28:38 lr: 0.000007 loss_cls: 3.9608 (3.8859) grad_norm: 2.3828 (2.3797) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 22:09:26 root] (utils.py 283): INFO Epoch: [20] [ 270/2502] eta: 0:28:30 lr: 0.000007 loss_cls: 3.8334 (3.8794) grad_norm: 2.3669 (2.3774) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 22:09:34 root] (utils.py 283): INFO Epoch: [20] [ 280/2502] eta: 0:28:22 lr: 0.000007 loss_cls: 3.2856 (3.8726) grad_norm: 2.3390 (2.3775) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 22:09:41 root] (utils.py 283): INFO Epoch: [20] [ 290/2502] eta: 0:28:14 lr: 0.000007 loss_cls: 3.8991 (3.8789) grad_norm: 2.3390 (2.3780) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 22:09:49 root] (utils.py 283): INFO Epoch: [20] [ 300/2502] eta: 0:28:07 lr: 0.000007 loss_cls: 4.0361 (3.8804) grad_norm: 2.2972 (2.3764) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 22:09:57 root] (utils.py 283): INFO Epoch: [20] [ 310/2502] eta: 0:28:00 lr: 0.000007 loss_cls: 3.9871 (3.8881) grad_norm: 2.3994 (2.3779) time: 0.7708 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:04 root] (utils.py 283): INFO Epoch: [20] [ 320/2502] eta: 0:27:52 lr: 0.000007 loss_cls: 3.9214 (3.8783) grad_norm: 2.3861 (2.3793) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:12 root] (utils.py 283): INFO Epoch: [20] [ 330/2502] eta: 0:27:44 lr: 0.000007 loss_cls: 3.6823 (3.8805) grad_norm: 2.3861 (2.3807) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:20 root] (utils.py 283): INFO Epoch: [20] [ 340/2502] eta: 0:27:36 lr: 0.000007 loss_cls: 4.2053 (3.8905) grad_norm: 2.4048 (2.3825) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:27 root] (utils.py 283): INFO Epoch: [20] [ 350/2502] eta: 0:27:28 lr: 0.000007 loss_cls: 4.1581 (3.8868) grad_norm: 2.3884 (2.3811) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:35 root] (utils.py 283): INFO Epoch: [20] [ 360/2502] eta: 0:27:21 lr: 0.000007 loss_cls: 4.1633 (3.9000) grad_norm: 2.3648 (2.3823) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:43 root] (utils.py 283): INFO Epoch: [20] [ 370/2502] eta: 0:27:13 lr: 0.000007 loss_cls: 4.2944 (3.9070) grad_norm: 2.3165 (2.3810) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:50 root] (utils.py 283): INFO Epoch: [20] [ 380/2502] eta: 0:27:05 lr: 0.000007 loss_cls: 4.0869 (3.9108) grad_norm: 2.3160 (2.3803) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 22:10:58 root] (utils.py 283): INFO Epoch: [20] [ 390/2502] eta: 0:26:58 lr: 0.000007 loss_cls: 4.0422 (3.9159) grad_norm: 2.3783 (2.3809) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 22:11:05 root] (utils.py 283): INFO Epoch: [20] [ 400/2502] eta: 0:26:50 lr: 0.000007 loss_cls: 4.2292 (3.9239) grad_norm: 2.3409 (2.3794) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 22:11:13 root] (utils.py 283): INFO Epoch: [20] [ 410/2502] eta: 0:26:42 lr: 0.000007 loss_cls: 4.2182 (3.9257) grad_norm: 2.3263 (2.3788) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 22:11:21 root] (utils.py 283): INFO Epoch: [20] [ 420/2502] eta: 0:26:35 lr: 0.000007 loss_cls: 4.1705 (3.9305) grad_norm: 2.3263 (2.3785) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 22:11:28 root] (utils.py 283): INFO Epoch: [20] [ 430/2502] eta: 0:26:27 lr: 0.000007 loss_cls: 4.2964 (3.9342) grad_norm: 2.3547 (2.3795) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 22:11:36 root] (utils.py 283): INFO Epoch: [20] [ 440/2502] eta: 0:26:20 lr: 0.000007 loss_cls: 4.1290 (3.9345) grad_norm: 2.4598 (2.3820) time: 0.7747 data: 0.0002 max mem: 8426 +[2024-12-10 22:11:44 root] (utils.py 283): INFO Epoch: [20] [ 450/2502] eta: 0:26:13 lr: 0.000007 loss_cls: 3.8784 (3.9259) grad_norm: 2.4304 (2.3828) time: 0.7856 data: 0.0002 max mem: 8426 +[2024-12-10 22:11:52 root] (utils.py 283): INFO Epoch: [20] [ 460/2502] eta: 0:26:05 lr: 0.000007 loss_cls: 3.9322 (3.9280) grad_norm: 2.3799 (2.3829) time: 0.7734 data: 0.0003 max mem: 8426 +[2024-12-10 22:11:59 root] (utils.py 283): INFO Epoch: [20] [ 470/2502] eta: 0:25:58 lr: 0.000007 loss_cls: 4.1166 (3.9274) grad_norm: 2.3160 (2.3817) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-10 22:12:07 root] (utils.py 283): INFO Epoch: [20] [ 480/2502] eta: 0:25:51 lr: 0.000007 loss_cls: 4.1166 (3.9286) grad_norm: 2.3626 (2.3831) time: 0.7712 data: 0.0003 max mem: 8426 +[2024-12-10 22:12:15 root] (utils.py 283): INFO Epoch: [20] [ 490/2502] eta: 0:25:43 lr: 0.000007 loss_cls: 4.0058 (3.9254) grad_norm: 2.4397 (2.3835) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 22:12:22 root] (utils.py 283): INFO Epoch: [20] [ 500/2502] eta: 0:25:34 lr: 0.000007 loss_cls: 4.0190 (3.9313) grad_norm: 2.3899 (2.3841) time: 0.7587 data: 0.0003 max mem: 8426 +[2024-12-10 22:12:30 root] (utils.py 283): INFO Epoch: [20] [ 510/2502] eta: 0:25:27 lr: 0.000007 loss_cls: 4.0387 (3.9317) grad_norm: 2.3722 (2.3840) time: 0.7570 data: 0.0003 max mem: 8426 +[2024-12-10 22:12:38 root] (utils.py 283): INFO Epoch: [20] [ 520/2502] eta: 0:25:19 lr: 0.000007 loss_cls: 4.0568 (3.9341) grad_norm: 2.3722 (2.3844) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 22:12:45 root] (utils.py 283): INFO Epoch: [20] [ 530/2502] eta: 0:25:11 lr: 0.000007 loss_cls: 4.1080 (3.9364) grad_norm: 2.4207 (2.3845) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 22:12:53 root] (utils.py 283): INFO Epoch: [20] [ 540/2502] eta: 0:25:03 lr: 0.000007 loss_cls: 4.0061 (3.9352) grad_norm: 2.4207 (2.3850) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:01 root] (utils.py 283): INFO Epoch: [20] [ 550/2502] eta: 0:24:56 lr: 0.000007 loss_cls: 4.1018 (3.9378) grad_norm: 2.4122 (2.3846) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:08 root] (utils.py 283): INFO Epoch: [20] [ 560/2502] eta: 0:24:49 lr: 0.000007 loss_cls: 3.6593 (3.9301) grad_norm: 2.3762 (2.3849) time: 0.7732 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:16 root] (utils.py 283): INFO Epoch: [20] [ 570/2502] eta: 0:24:41 lr: 0.000007 loss_cls: 3.6670 (3.9312) grad_norm: 2.3870 (2.3851) time: 0.7796 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:24 root] (utils.py 283): INFO Epoch: [20] [ 580/2502] eta: 0:24:34 lr: 0.000007 loss_cls: 3.8106 (3.9255) grad_norm: 2.4064 (2.3853) time: 0.7801 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:32 root] (utils.py 283): INFO Epoch: [20] [ 590/2502] eta: 0:24:27 lr: 0.000007 loss_cls: 3.3401 (3.9159) grad_norm: 2.4112 (2.3855) time: 0.7797 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:40 root] (utils.py 283): INFO Epoch: [20] [ 600/2502] eta: 0:24:20 lr: 0.000007 loss_cls: 3.5218 (3.9186) grad_norm: 2.3539 (2.3863) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-10 22:13:47 root] (utils.py 283): INFO Epoch: [20] [ 610/2502] eta: 0:24:12 lr: 0.000007 loss_cls: 4.0928 (3.9173) grad_norm: 2.3536 (2.3863) time: 0.7823 data: 0.0003 max mem: 8426 +[2024-12-10 22:13:55 root] (utils.py 283): INFO Epoch: [20] [ 620/2502] eta: 0:24:05 lr: 0.000007 loss_cls: 3.6726 (3.9136) grad_norm: 2.3805 (2.3866) time: 0.7800 data: 0.0003 max mem: 8426 +[2024-12-10 22:14:03 root] (utils.py 283): INFO Epoch: [20] [ 630/2502] eta: 0:23:58 lr: 0.000007 loss_cls: 4.2324 (3.9190) grad_norm: 2.3710 (2.3865) time: 0.7867 data: 0.0003 max mem: 8426 +[2024-12-10 22:14:11 root] (utils.py 283): INFO Epoch: [20] [ 640/2502] eta: 0:23:51 lr: 0.000007 loss_cls: 4.2062 (3.9182) grad_norm: 2.3582 (2.3855) time: 0.7867 data: 0.0002 max mem: 8426 +[2024-12-10 22:14:19 root] (utils.py 283): INFO Epoch: [20] [ 650/2502] eta: 0:23:43 lr: 0.000007 loss_cls: 4.0876 (3.9221) grad_norm: 2.3459 (2.3852) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 22:14:27 root] (utils.py 283): INFO Epoch: [20] [ 660/2502] eta: 0:23:36 lr: 0.000007 loss_cls: 4.1663 (3.9246) grad_norm: 2.3375 (2.3842) time: 0.7815 data: 0.0003 max mem: 8426 +[2024-12-10 22:14:34 root] (utils.py 283): INFO Epoch: [20] [ 670/2502] eta: 0:23:29 lr: 0.000007 loss_cls: 4.0048 (3.9239) grad_norm: 2.3666 (2.3852) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 22:14:42 root] (utils.py 283): INFO Epoch: [20] [ 680/2502] eta: 0:23:21 lr: 0.000007 loss_cls: 4.2085 (3.9274) grad_norm: 2.4152 (2.3856) time: 0.7812 data: 0.0003 max mem: 8426 +[2024-12-10 22:14:50 root] (utils.py 283): INFO Epoch: [20] [ 690/2502] eta: 0:23:14 lr: 0.000007 loss_cls: 4.2291 (3.9310) grad_norm: 2.3796 (2.3853) time: 0.7831 data: 0.0003 max mem: 8426 +[2024-12-10 22:14:58 root] (utils.py 283): INFO Epoch: [20] [ 700/2502] eta: 0:23:07 lr: 0.000007 loss_cls: 4.0234 (3.9329) grad_norm: 2.3692 (2.3858) time: 0.7835 data: 0.0003 max mem: 8426 +[2024-12-10 22:15:06 root] (utils.py 283): INFO Epoch: [20] [ 710/2502] eta: 0:22:59 lr: 0.000007 loss_cls: 3.9784 (3.9284) grad_norm: 2.3652 (2.3860) time: 0.7826 data: 0.0003 max mem: 8426 +[2024-12-10 22:15:13 root] (utils.py 283): INFO Epoch: [20] [ 720/2502] eta: 0:22:52 lr: 0.000007 loss_cls: 3.8546 (3.9260) grad_norm: 2.3571 (2.3857) time: 0.7745 data: 0.0003 max mem: 8426 +[2024-12-10 22:15:21 root] (utils.py 283): INFO Epoch: [20] [ 730/2502] eta: 0:22:44 lr: 0.000007 loss_cls: 4.1036 (3.9260) grad_norm: 2.3035 (2.3855) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 22:15:29 root] (utils.py 283): INFO Epoch: [20] [ 740/2502] eta: 0:22:36 lr: 0.000007 loss_cls: 3.7632 (3.9202) grad_norm: 2.3193 (2.3870) time: 0.7735 data: 0.0002 max mem: 8426 +[2024-12-10 22:15:37 root] (utils.py 283): INFO Epoch: [20] [ 750/2502] eta: 0:22:29 lr: 0.000007 loss_cls: 3.7378 (3.9177) grad_norm: 2.4134 (2.3871) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-10 22:15:44 root] (utils.py 283): INFO Epoch: [20] [ 760/2502] eta: 0:22:21 lr: 0.000007 loss_cls: 3.7378 (3.9146) grad_norm: 2.3708 (2.3867) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 22:15:52 root] (utils.py 283): INFO Epoch: [20] [ 770/2502] eta: 0:22:13 lr: 0.000007 loss_cls: 3.9535 (3.9147) grad_norm: 2.3645 (2.3863) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-10 22:16:00 root] (utils.py 283): INFO Epoch: [20] [ 780/2502] eta: 0:22:06 lr: 0.000007 loss_cls: 3.9984 (3.9130) grad_norm: 2.3851 (2.3866) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-10 22:16:07 root] (utils.py 283): INFO Epoch: [20] [ 790/2502] eta: 0:21:58 lr: 0.000007 loss_cls: 4.2921 (3.9162) grad_norm: 2.3435 (2.3873) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 22:16:15 root] (utils.py 283): INFO Epoch: [20] [ 800/2502] eta: 0:21:50 lr: 0.000007 loss_cls: 4.2300 (3.9161) grad_norm: 2.3547 (2.3870) time: 0.7721 data: 0.0003 max mem: 8426 +[2024-12-10 22:16:23 root] (utils.py 283): INFO Epoch: [20] [ 810/2502] eta: 0:21:42 lr: 0.000007 loss_cls: 4.1433 (3.9179) grad_norm: 2.3718 (2.3877) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 22:16:30 root] (utils.py 283): INFO Epoch: [20] [ 820/2502] eta: 0:21:35 lr: 0.000007 loss_cls: 4.2806 (3.9202) grad_norm: 2.3746 (2.3872) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 22:16:38 root] (utils.py 283): INFO Epoch: [20] [ 830/2502] eta: 0:21:27 lr: 0.000007 loss_cls: 4.2937 (3.9240) grad_norm: 2.3469 (2.3875) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 22:16:46 root] (utils.py 283): INFO Epoch: [20] [ 840/2502] eta: 0:21:19 lr: 0.000007 loss_cls: 4.2024 (3.9274) grad_norm: 2.3559 (2.3872) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 22:16:54 root] (utils.py 283): INFO Epoch: [20] [ 850/2502] eta: 0:21:12 lr: 0.000007 loss_cls: 4.1577 (3.9269) grad_norm: 2.3559 (2.3871) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 22:17:01 root] (utils.py 283): INFO Epoch: [20] [ 860/2502] eta: 0:21:04 lr: 0.000007 loss_cls: 4.1577 (3.9292) grad_norm: 2.3615 (2.3871) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 22:17:09 root] (utils.py 283): INFO Epoch: [20] [ 870/2502] eta: 0:20:56 lr: 0.000007 loss_cls: 4.1568 (3.9303) grad_norm: 2.3904 (2.3875) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 22:17:17 root] (utils.py 283): INFO Epoch: [20] [ 880/2502] eta: 0:20:48 lr: 0.000007 loss_cls: 4.0838 (3.9325) grad_norm: 2.3726 (2.3873) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 22:17:24 root] (utils.py 283): INFO Epoch: [20] [ 890/2502] eta: 0:20:40 lr: 0.000007 loss_cls: 4.0098 (3.9314) grad_norm: 2.4000 (2.3874) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 22:17:32 root] (utils.py 283): INFO Epoch: [20] [ 900/2502] eta: 0:20:33 lr: 0.000007 loss_cls: 3.9475 (3.9310) grad_norm: 2.4127 (2.3873) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 22:17:40 root] (utils.py 283): INFO Epoch: [20] [ 910/2502] eta: 0:20:25 lr: 0.000007 loss_cls: 4.0315 (3.9319) grad_norm: 2.3784 (2.3877) time: 0.7836 data: 0.0002 max mem: 8426 +[2024-12-10 22:17:48 root] (utils.py 283): INFO Epoch: [20] [ 920/2502] eta: 0:20:18 lr: 0.000007 loss_cls: 4.0290 (3.9321) grad_norm: 2.3846 (2.3876) time: 0.7805 data: 0.0002 max mem: 8426 +[2024-12-10 22:17:55 root] (utils.py 283): INFO Epoch: [20] [ 930/2502] eta: 0:20:10 lr: 0.000007 loss_cls: 4.0529 (3.9315) grad_norm: 2.3764 (2.3879) time: 0.7799 data: 0.0003 max mem: 8426 +[2024-12-10 22:18:03 root] (utils.py 283): INFO Epoch: [20] [ 940/2502] eta: 0:20:03 lr: 0.000007 loss_cls: 4.0529 (3.9314) grad_norm: 2.3764 (2.3882) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 22:18:11 root] (utils.py 283): INFO Epoch: [20] [ 950/2502] eta: 0:19:55 lr: 0.000007 loss_cls: 3.7676 (3.9266) grad_norm: 2.4001 (2.3879) time: 0.7805 data: 0.0003 max mem: 8426 +[2024-12-10 22:18:19 root] (utils.py 283): INFO Epoch: [20] [ 960/2502] eta: 0:19:48 lr: 0.000007 loss_cls: 3.4971 (3.9241) grad_norm: 2.4150 (2.3885) time: 0.7834 data: 0.0003 max mem: 8426 +[2024-12-10 22:18:27 root] (utils.py 283): INFO Epoch: [20] [ 970/2502] eta: 0:19:40 lr: 0.000007 loss_cls: 3.7781 (3.9230) grad_norm: 2.4409 (2.3889) time: 0.7841 data: 0.0003 max mem: 8426 +[2024-12-10 22:18:35 root] (utils.py 283): INFO Epoch: [20] [ 980/2502] eta: 0:19:33 lr: 0.000007 loss_cls: 4.0343 (3.9233) grad_norm: 2.4318 (2.3894) time: 0.7826 data: 0.0002 max mem: 8426 +[2024-12-10 22:18:42 root] (utils.py 283): INFO Epoch: [20] [ 990/2502] eta: 0:19:25 lr: 0.000007 loss_cls: 4.0783 (3.9233) grad_norm: 2.3871 (2.3892) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 22:18:50 root] (utils.py 283): INFO Epoch: [20] [1000/2502] eta: 0:19:18 lr: 0.000007 loss_cls: 4.1187 (3.9263) grad_norm: 2.3722 (2.3892) time: 0.7814 data: 0.0002 max mem: 8426 +[2024-12-10 22:18:58 root] (utils.py 283): INFO Epoch: [20] [1010/2502] eta: 0:19:10 lr: 0.000007 loss_cls: 4.1365 (3.9255) grad_norm: 2.3979 (2.3893) time: 0.7834 data: 0.0003 max mem: 8426 +[2024-12-10 22:19:06 root] (utils.py 283): INFO Epoch: [20] [1020/2502] eta: 0:19:03 lr: 0.000007 loss_cls: 4.1369 (3.9259) grad_norm: 2.3531 (2.3893) time: 0.7843 data: 0.0002 max mem: 8426 +[2024-12-10 22:19:14 root] (utils.py 283): INFO Epoch: [20] [1030/2502] eta: 0:18:55 lr: 0.000007 loss_cls: 4.1369 (3.9250) grad_norm: 2.3407 (2.3891) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 22:19:21 root] (utils.py 283): INFO Epoch: [20] [1040/2502] eta: 0:18:47 lr: 0.000007 loss_cls: 3.6280 (3.9237) grad_norm: 2.3407 (2.3891) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 22:19:29 root] (utils.py 283): INFO Epoch: [20] [1050/2502] eta: 0:18:40 lr: 0.000007 loss_cls: 4.1399 (3.9253) grad_norm: 2.4052 (2.3898) time: 0.7847 data: 0.0003 max mem: 8426 +[2024-12-10 22:19:37 root] (utils.py 283): INFO Epoch: [20] [1060/2502] eta: 0:18:32 lr: 0.000007 loss_cls: 4.0664 (3.9250) grad_norm: 2.4361 (2.3902) time: 0.7850 data: 0.0003 max mem: 8426 +[2024-12-10 22:19:45 root] (utils.py 283): INFO Epoch: [20] [1070/2502] eta: 0:18:25 lr: 0.000007 loss_cls: 3.9537 (3.9247) grad_norm: 2.4052 (2.3900) time: 0.7823 data: 0.0002 max mem: 8426 +[2024-12-10 22:19:53 root] (utils.py 283): INFO Epoch: [20] [1080/2502] eta: 0:18:17 lr: 0.000007 loss_cls: 4.1060 (3.9251) grad_norm: 2.3512 (2.3901) time: 0.7830 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:01 root] (utils.py 283): INFO Epoch: [20] [1090/2502] eta: 0:18:10 lr: 0.000007 loss_cls: 4.0879 (3.9219) grad_norm: 2.3440 (2.3900) time: 0.7832 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:08 root] (utils.py 283): INFO Epoch: [20] [1100/2502] eta: 0:18:02 lr: 0.000007 loss_cls: 3.9153 (3.9199) grad_norm: 2.2723 (2.3902) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-10 22:20:16 root] (utils.py 283): INFO Epoch: [20] [1110/2502] eta: 0:17:54 lr: 0.000007 loss_cls: 3.9400 (3.9194) grad_norm: 2.3035 (2.3899) time: 0.7808 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:24 root] (utils.py 283): INFO Epoch: [20] [1120/2502] eta: 0:17:47 lr: 0.000007 loss_cls: 4.1426 (3.9203) grad_norm: 2.3291 (2.3895) time: 0.7802 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:32 root] (utils.py 283): INFO Epoch: [20] [1130/2502] eta: 0:17:39 lr: 0.000007 loss_cls: 4.1426 (3.9215) grad_norm: 2.3828 (2.3900) time: 0.7820 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:40 root] (utils.py 283): INFO Epoch: [20] [1140/2502] eta: 0:17:32 lr: 0.000007 loss_cls: 4.0345 (3.9194) grad_norm: 2.3578 (2.3897) time: 0.7836 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:48 root] (utils.py 283): INFO Epoch: [20] [1150/2502] eta: 0:17:24 lr: 0.000007 loss_cls: 3.8946 (3.9200) grad_norm: 2.3472 (2.3894) time: 0.7799 data: 0.0003 max mem: 8426 +[2024-12-10 22:20:55 root] (utils.py 283): INFO Epoch: [20] [1160/2502] eta: 0:17:16 lr: 0.000007 loss_cls: 3.9929 (3.9202) grad_norm: 2.3643 (2.3893) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 22:21:03 root] (utils.py 283): INFO Epoch: [20] [1170/2502] eta: 0:17:09 lr: 0.000007 loss_cls: 3.8616 (3.9166) grad_norm: 2.3643 (2.3893) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 22:21:11 root] (utils.py 283): INFO Epoch: [20] [1180/2502] eta: 0:17:01 lr: 0.000007 loss_cls: 3.3813 (3.9147) grad_norm: 2.4231 (2.3900) time: 0.7773 data: 0.0002 max mem: 8426 +[2024-12-10 22:21:19 root] (utils.py 283): INFO Epoch: [20] [1190/2502] eta: 0:16:53 lr: 0.000007 loss_cls: 3.1847 (3.9107) grad_norm: 2.4211 (2.3899) time: 0.7721 data: 0.0003 max mem: 8426 +[2024-12-10 22:21:26 root] (utils.py 283): INFO Epoch: [20] [1200/2502] eta: 0:16:45 lr: 0.000007 loss_cls: 3.4780 (3.9086) grad_norm: 2.3760 (2.3901) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 22:21:34 root] (utils.py 283): INFO Epoch: [20] [1210/2502] eta: 0:16:38 lr: 0.000007 loss_cls: 3.4246 (3.9053) grad_norm: 2.3854 (2.3906) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 22:21:42 root] (utils.py 283): INFO Epoch: [20] [1220/2502] eta: 0:16:30 lr: 0.000007 loss_cls: 4.0381 (3.9056) grad_norm: 2.3959 (2.3908) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 22:21:49 root] (utils.py 283): INFO Epoch: [20] [1230/2502] eta: 0:16:22 lr: 0.000007 loss_cls: 4.0805 (3.9059) grad_norm: 2.3713 (2.3905) time: 0.7708 data: 0.0002 max mem: 8426 +[2024-12-10 22:21:57 root] (utils.py 283): INFO Epoch: [20] [1240/2502] eta: 0:16:14 lr: 0.000007 loss_cls: 4.0818 (3.9061) grad_norm: 2.3318 (2.3897) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 22:22:05 root] (utils.py 283): INFO Epoch: [20] [1250/2502] eta: 0:16:06 lr: 0.000007 loss_cls: 4.0818 (3.9068) grad_norm: 2.2988 (2.3894) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 22:22:12 root] (utils.py 283): INFO Epoch: [20] [1260/2502] eta: 0:15:59 lr: 0.000007 loss_cls: 4.1948 (3.9090) grad_norm: 2.3231 (2.3891) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 22:22:20 root] (utils.py 283): INFO Epoch: [20] [1270/2502] eta: 0:15:51 lr: 0.000007 loss_cls: 4.0752 (3.9075) grad_norm: 2.4042 (2.3897) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 22:22:28 root] (utils.py 283): INFO Epoch: [20] [1280/2502] eta: 0:15:43 lr: 0.000007 loss_cls: 3.2209 (3.9033) grad_norm: 2.4257 (2.3905) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 22:22:35 root] (utils.py 283): INFO Epoch: [20] [1290/2502] eta: 0:15:35 lr: 0.000007 loss_cls: 3.4691 (3.9037) grad_norm: 2.4143 (2.3906) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 22:22:43 root] (utils.py 283): INFO Epoch: [20] [1300/2502] eta: 0:15:28 lr: 0.000007 loss_cls: 4.2367 (3.9053) grad_norm: 2.3730 (2.3905) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 22:22:50 root] (utils.py 283): INFO Epoch: [20] [1310/2502] eta: 0:15:20 lr: 0.000007 loss_cls: 4.2367 (3.9065) grad_norm: 2.3746 (2.3909) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 22:22:58 root] (utils.py 283): INFO Epoch: [20] [1320/2502] eta: 0:15:12 lr: 0.000007 loss_cls: 4.1481 (3.9080) grad_norm: 2.4291 (2.3914) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 22:23:06 root] (utils.py 283): INFO Epoch: [20] [1330/2502] eta: 0:15:04 lr: 0.000007 loss_cls: 4.0372 (3.9079) grad_norm: 2.3973 (2.3919) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 22:23:13 root] (utils.py 283): INFO Epoch: [20] [1340/2502] eta: 0:14:56 lr: 0.000007 loss_cls: 4.0135 (3.9066) grad_norm: 2.3711 (2.3915) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 22:23:21 root] (utils.py 283): INFO Epoch: [20] [1350/2502] eta: 0:14:49 lr: 0.000007 loss_cls: 3.9989 (3.9082) grad_norm: 2.3711 (2.3920) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 22:23:29 root] (utils.py 283): INFO Epoch: [20] [1360/2502] eta: 0:14:41 lr: 0.000007 loss_cls: 3.9178 (3.9078) grad_norm: 2.4113 (2.3924) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 22:23:37 root] (utils.py 283): INFO Epoch: [20] [1370/2502] eta: 0:14:33 lr: 0.000007 loss_cls: 3.8040 (3.9078) grad_norm: 2.3786 (2.3920) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 22:23:44 root] (utils.py 283): INFO Epoch: [20] [1380/2502] eta: 0:14:25 lr: 0.000007 loss_cls: 4.0753 (3.9097) grad_norm: 2.3258 (2.3919) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 22:23:52 root] (utils.py 283): INFO Epoch: [20] [1390/2502] eta: 0:14:18 lr: 0.000007 loss_cls: 4.2713 (3.9119) grad_norm: 2.3322 (2.3917) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 22:23:59 root] (utils.py 283): INFO Epoch: [20] [1400/2502] eta: 0:14:10 lr: 0.000007 loss_cls: 4.1679 (3.9118) grad_norm: 2.3554 (2.3918) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:07 root] (utils.py 283): INFO Epoch: [20] [1410/2502] eta: 0:14:02 lr: 0.000007 loss_cls: 3.9660 (3.9125) grad_norm: 2.3842 (2.3918) time: 0.7758 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:15 root] (utils.py 283): INFO Epoch: [20] [1420/2502] eta: 0:13:54 lr: 0.000007 loss_cls: 3.9582 (3.9110) grad_norm: 2.3842 (2.3922) time: 0.7765 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:23 root] (utils.py 283): INFO Epoch: [20] [1430/2502] eta: 0:13:47 lr: 0.000007 loss_cls: 4.0683 (3.9121) grad_norm: 2.3611 (2.3917) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:30 root] (utils.py 283): INFO Epoch: [20] [1440/2502] eta: 0:13:39 lr: 0.000007 loss_cls: 3.8616 (3.9092) grad_norm: 2.3782 (2.3919) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:38 root] (utils.py 283): INFO Epoch: [20] [1450/2502] eta: 0:13:31 lr: 0.000007 loss_cls: 3.3986 (3.9075) grad_norm: 2.4106 (2.3923) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:45 root] (utils.py 283): INFO Epoch: [20] [1460/2502] eta: 0:13:23 lr: 0.000007 loss_cls: 4.0610 (3.9088) grad_norm: 2.3450 (2.3925) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 22:24:53 root] (utils.py 283): INFO Epoch: [20] [1470/2502] eta: 0:13:16 lr: 0.000007 loss_cls: 4.0610 (3.9083) grad_norm: 2.3709 (2.3925) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 22:25:01 root] (utils.py 283): INFO Epoch: [20] [1480/2502] eta: 0:13:08 lr: 0.000007 loss_cls: 4.0507 (3.9087) grad_norm: 2.4230 (2.3932) time: 0.7613 data: 0.0003 max mem: 8426 +[2024-12-10 22:25:08 root] (utils.py 283): INFO Epoch: [20] [1490/2502] eta: 0:13:00 lr: 0.000007 loss_cls: 4.1632 (3.9108) grad_norm: 2.4780 (2.3936) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 22:25:16 root] (utils.py 283): INFO Epoch: [20] [1500/2502] eta: 0:12:52 lr: 0.000007 loss_cls: 4.1632 (3.9118) grad_norm: 2.4094 (2.3937) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-10 22:25:23 root] (utils.py 283): INFO Epoch: [20] [1510/2502] eta: 0:12:44 lr: 0.000007 loss_cls: 3.6753 (3.9096) grad_norm: 2.3992 (2.3937) time: 0.7567 data: 0.0003 max mem: 8426 +[2024-12-10 22:25:31 root] (utils.py 283): INFO Epoch: [20] [1520/2502] eta: 0:12:37 lr: 0.000007 loss_cls: 4.0654 (3.9110) grad_norm: 2.3708 (2.3939) time: 0.7567 data: 0.0003 max mem: 8426 +[2024-12-10 22:25:39 root] (utils.py 283): INFO Epoch: [20] [1530/2502] eta: 0:12:29 lr: 0.000007 loss_cls: 4.0654 (3.9082) grad_norm: 2.3708 (2.3938) time: 0.7587 data: 0.0003 max mem: 8426 +[2024-12-10 22:25:46 root] (utils.py 283): INFO Epoch: [20] [1540/2502] eta: 0:12:21 lr: 0.000007 loss_cls: 3.7645 (3.9085) grad_norm: 2.3852 (2.3940) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 22:25:54 root] (utils.py 283): INFO Epoch: [20] [1550/2502] eta: 0:12:13 lr: 0.000007 loss_cls: 3.9590 (3.9085) grad_norm: 2.3870 (2.3943) time: 0.7578 data: 0.0003 max mem: 8426 +[2024-12-10 22:26:01 root] (utils.py 283): INFO Epoch: [20] [1560/2502] eta: 0:12:05 lr: 0.000007 loss_cls: 4.0512 (3.9097) grad_norm: 2.3646 (2.3940) time: 0.7553 data: 0.0003 max mem: 8426 +[2024-12-10 22:26:09 root] (utils.py 283): INFO Epoch: [20] [1570/2502] eta: 0:11:58 lr: 0.000007 loss_cls: 4.0695 (3.9107) grad_norm: 2.3429 (2.3937) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 22:26:17 root] (utils.py 283): INFO Epoch: [20] [1580/2502] eta: 0:11:50 lr: 0.000007 loss_cls: 4.0964 (3.9115) grad_norm: 2.3376 (2.3940) time: 0.7846 data: 0.0002 max mem: 8426 +[2024-12-10 22:26:25 root] (utils.py 283): INFO Epoch: [20] [1590/2502] eta: 0:11:42 lr: 0.000007 loss_cls: 3.7990 (3.9099) grad_norm: 2.3376 (2.3941) time: 0.7767 data: 0.0002 max mem: 8426 +[2024-12-10 22:26:32 root] (utils.py 283): INFO Epoch: [20] [1600/2502] eta: 0:11:35 lr: 0.000007 loss_cls: 4.0431 (3.9127) grad_norm: 2.3533 (2.3942) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 22:26:40 root] (utils.py 283): INFO Epoch: [20] [1610/2502] eta: 0:11:27 lr: 0.000007 loss_cls: 4.1266 (3.9121) grad_norm: 2.3384 (2.3937) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 22:26:48 root] (utils.py 283): INFO Epoch: [20] [1620/2502] eta: 0:11:19 lr: 0.000007 loss_cls: 3.7432 (3.9105) grad_norm: 2.3357 (2.3938) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 22:26:55 root] (utils.py 283): INFO Epoch: [20] [1630/2502] eta: 0:11:11 lr: 0.000007 loss_cls: 3.8379 (3.9118) grad_norm: 2.3739 (2.3939) time: 0.7592 data: 0.0003 max mem: 8426 +[2024-12-10 22:27:03 root] (utils.py 283): INFO Epoch: [20] [1640/2502] eta: 0:11:04 lr: 0.000007 loss_cls: 4.2826 (3.9127) grad_norm: 2.2958 (2.3933) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 22:27:11 root] (utils.py 283): INFO Epoch: [20] [1650/2502] eta: 0:10:56 lr: 0.000007 loss_cls: 3.8672 (3.9123) grad_norm: 2.3202 (2.3932) time: 0.7746 data: 0.0002 max mem: 8426 +[2024-12-10 22:27:18 root] (utils.py 283): INFO Epoch: [20] [1660/2502] eta: 0:10:48 lr: 0.000007 loss_cls: 3.6608 (3.9102) grad_norm: 2.4114 (2.3934) time: 0.7798 data: 0.0002 max mem: 8426 +[2024-12-10 22:27:26 root] (utils.py 283): INFO Epoch: [20] [1670/2502] eta: 0:10:41 lr: 0.000007 loss_cls: 3.7446 (3.9108) grad_norm: 2.4419 (2.3937) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 22:27:34 root] (utils.py 283): INFO Epoch: [20] [1680/2502] eta: 0:10:33 lr: 0.000007 loss_cls: 3.8492 (3.9097) grad_norm: 2.4150 (2.3939) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-10 22:27:42 root] (utils.py 283): INFO Epoch: [20] [1690/2502] eta: 0:10:25 lr: 0.000007 loss_cls: 3.4383 (3.9078) grad_norm: 2.4251 (2.3943) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-10 22:27:49 root] (utils.py 283): INFO Epoch: [20] [1700/2502] eta: 0:10:18 lr: 0.000007 loss_cls: 4.0804 (3.9093) grad_norm: 2.4177 (2.3945) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 22:27:57 root] (utils.py 283): INFO Epoch: [20] [1710/2502] eta: 0:10:10 lr: 0.000007 loss_cls: 4.1703 (3.9095) grad_norm: 2.4035 (2.3944) time: 0.7797 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:05 root] (utils.py 283): INFO Epoch: [20] [1720/2502] eta: 0:10:02 lr: 0.000007 loss_cls: 4.0362 (3.9090) grad_norm: 2.3725 (2.3945) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:12 root] (utils.py 283): INFO Epoch: [20] [1730/2502] eta: 0:09:54 lr: 0.000007 loss_cls: 3.6011 (3.9066) grad_norm: 2.3447 (2.3943) time: 0.7563 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:20 root] (utils.py 283): INFO Epoch: [20] [1740/2502] eta: 0:09:47 lr: 0.000007 loss_cls: 3.5581 (3.9064) grad_norm: 2.3259 (2.3945) time: 0.7557 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:27 root] (utils.py 283): INFO Epoch: [20] [1750/2502] eta: 0:09:39 lr: 0.000007 loss_cls: 3.9588 (3.9070) grad_norm: 2.3259 (2.3943) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:35 root] (utils.py 283): INFO Epoch: [20] [1760/2502] eta: 0:09:31 lr: 0.000007 loss_cls: 3.8089 (3.9058) grad_norm: 2.4108 (2.3945) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:43 root] (utils.py 283): INFO Epoch: [20] [1770/2502] eta: 0:09:23 lr: 0.000007 loss_cls: 3.8810 (3.9062) grad_norm: 2.4297 (2.3947) time: 0.7572 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:50 root] (utils.py 283): INFO Epoch: [20] [1780/2502] eta: 0:09:16 lr: 0.000007 loss_cls: 3.7326 (3.9052) grad_norm: 2.4035 (2.3947) time: 0.7561 data: 0.0002 max mem: 8426 +[2024-12-10 22:28:58 root] (utils.py 283): INFO Epoch: [20] [1790/2502] eta: 0:09:08 lr: 0.000007 loss_cls: 3.7486 (3.9054) grad_norm: 2.3632 (2.3945) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 22:29:06 root] (utils.py 283): INFO Epoch: [20] [1800/2502] eta: 0:09:00 lr: 0.000007 loss_cls: 3.9592 (3.9051) grad_norm: 2.3684 (2.3944) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 22:29:13 root] (utils.py 283): INFO Epoch: [20] [1810/2502] eta: 0:08:52 lr: 0.000007 loss_cls: 3.8779 (3.9046) grad_norm: 2.3986 (2.3943) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 22:29:21 root] (utils.py 283): INFO Epoch: [20] [1820/2502] eta: 0:08:45 lr: 0.000007 loss_cls: 3.8784 (3.9040) grad_norm: 2.3643 (2.3940) time: 0.7562 data: 0.0003 max mem: 8426 +[2024-12-10 22:29:28 root] (utils.py 283): INFO Epoch: [20] [1830/2502] eta: 0:08:37 lr: 0.000007 loss_cls: 3.9301 (3.9040) grad_norm: 2.3293 (2.3938) time: 0.7566 data: 0.0002 max mem: 8426 +[2024-12-10 22:29:36 root] (utils.py 283): INFO Epoch: [20] [1840/2502] eta: 0:08:29 lr: 0.000007 loss_cls: 3.8982 (3.9036) grad_norm: 2.4149 (2.3941) time: 0.7580 data: 0.0002 max mem: 8426 +[2024-12-10 22:29:44 root] (utils.py 283): INFO Epoch: [20] [1850/2502] eta: 0:08:21 lr: 0.000007 loss_cls: 3.9471 (3.9049) grad_norm: 2.4562 (2.3942) time: 0.7575 data: 0.0002 max mem: 8426 +[2024-12-10 22:29:51 root] (utils.py 283): INFO Epoch: [20] [1860/2502] eta: 0:08:14 lr: 0.000007 loss_cls: 4.1335 (3.9062) grad_norm: 2.4671 (2.3940) time: 0.7573 data: 0.0002 max mem: 8426 +[2024-12-10 22:29:59 root] (utils.py 283): INFO Epoch: [20] [1870/2502] eta: 0:08:06 lr: 0.000007 loss_cls: 4.1335 (3.9052) grad_norm: 2.4978 (2.3943) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 22:30:06 root] (utils.py 283): INFO Epoch: [20] [1880/2502] eta: 0:07:58 lr: 0.000007 loss_cls: 4.1314 (3.9064) grad_norm: 2.4527 (2.3945) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-10 22:30:14 root] (utils.py 283): INFO Epoch: [20] [1890/2502] eta: 0:07:51 lr: 0.000007 loss_cls: 4.1711 (3.9077) grad_norm: 2.3618 (2.3943) time: 0.7595 data: 0.0003 max mem: 8426 +[2024-12-10 22:30:22 root] (utils.py 283): INFO Epoch: [20] [1900/2502] eta: 0:07:43 lr: 0.000007 loss_cls: 4.1106 (3.9083) grad_norm: 2.3273 (2.3941) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 22:30:29 root] (utils.py 283): INFO Epoch: [20] [1910/2502] eta: 0:07:35 lr: 0.000007 loss_cls: 4.0593 (3.9086) grad_norm: 2.3829 (2.3942) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 22:30:37 root] (utils.py 283): INFO Epoch: [20] [1920/2502] eta: 0:07:27 lr: 0.000007 loss_cls: 4.1006 (3.9097) grad_norm: 2.3829 (2.3941) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 22:30:44 root] (utils.py 283): INFO Epoch: [20] [1930/2502] eta: 0:07:20 lr: 0.000007 loss_cls: 3.9651 (3.9095) grad_norm: 2.3497 (2.3939) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 22:30:52 root] (utils.py 283): INFO Epoch: [20] [1940/2502] eta: 0:07:12 lr: 0.000007 loss_cls: 4.0509 (3.9096) grad_norm: 2.3445 (2.3940) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 22:31:00 root] (utils.py 283): INFO Epoch: [20] [1950/2502] eta: 0:07:04 lr: 0.000007 loss_cls: 4.0509 (3.9094) grad_norm: 2.4861 (2.3947) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 22:31:07 root] (utils.py 283): INFO Epoch: [20] [1960/2502] eta: 0:06:57 lr: 0.000007 loss_cls: 3.9388 (3.9097) grad_norm: 2.4376 (2.3945) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 22:31:15 root] (utils.py 283): INFO Epoch: [20] [1970/2502] eta: 0:06:49 lr: 0.000007 loss_cls: 3.9388 (3.9099) grad_norm: 2.3677 (2.3947) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-10 22:31:23 root] (utils.py 283): INFO Epoch: [20] [1980/2502] eta: 0:06:41 lr: 0.000007 loss_cls: 4.1309 (3.9104) grad_norm: 2.3867 (2.3948) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 22:31:30 root] (utils.py 283): INFO Epoch: [20] [1990/2502] eta: 0:06:33 lr: 0.000007 loss_cls: 4.1519 (3.9103) grad_norm: 2.3323 (2.3949) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 22:31:38 root] (utils.py 283): INFO Epoch: [20] [2000/2502] eta: 0:06:26 lr: 0.000007 loss_cls: 3.9153 (3.9101) grad_norm: 2.3323 (2.3948) time: 0.7537 data: 0.0002 max mem: 8426 +[2024-12-10 22:31:45 root] (utils.py 283): INFO Epoch: [20] [2010/2502] eta: 0:06:18 lr: 0.000007 loss_cls: 3.7430 (3.9083) grad_norm: 2.4095 (2.3950) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 22:31:53 root] (utils.py 283): INFO Epoch: [20] [2020/2502] eta: 0:06:10 lr: 0.000007 loss_cls: 3.6801 (3.9080) grad_norm: 2.4095 (2.3951) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:00 root] (utils.py 283): INFO Epoch: [20] [2030/2502] eta: 0:06:02 lr: 0.000007 loss_cls: 3.8687 (3.9073) grad_norm: 2.3493 (2.3948) time: 0.7550 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:08 root] (utils.py 283): INFO Epoch: [20] [2040/2502] eta: 0:05:55 lr: 0.000007 loss_cls: 4.0775 (3.9085) grad_norm: 2.3493 (2.3950) time: 0.7561 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:16 root] (utils.py 283): INFO Epoch: [20] [2050/2502] eta: 0:05:47 lr: 0.000007 loss_cls: 4.0546 (3.9083) grad_norm: 2.3984 (2.3952) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:23 root] (utils.py 283): INFO Epoch: [20] [2060/2502] eta: 0:05:39 lr: 0.000007 loss_cls: 4.0546 (3.9084) grad_norm: 2.4584 (2.3956) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:31 root] (utils.py 283): INFO Epoch: [20] [2070/2502] eta: 0:05:32 lr: 0.000007 loss_cls: 4.1041 (3.9089) grad_norm: 2.4419 (2.3956) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:38 root] (utils.py 283): INFO Epoch: [20] [2080/2502] eta: 0:05:24 lr: 0.000007 loss_cls: 4.0356 (3.9098) grad_norm: 2.3949 (2.3958) time: 0.7551 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:46 root] (utils.py 283): INFO Epoch: [20] [2090/2502] eta: 0:05:16 lr: 0.000007 loss_cls: 4.1798 (3.9121) grad_norm: 2.4291 (2.3961) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 22:32:54 root] (utils.py 283): INFO Epoch: [20] [2100/2502] eta: 0:05:09 lr: 0.000007 loss_cls: 4.2118 (3.9130) grad_norm: 2.4070 (2.3960) time: 0.7802 data: 0.0002 max mem: 8426 +[2024-12-10 22:33:02 root] (utils.py 283): INFO Epoch: [20] [2110/2502] eta: 0:05:01 lr: 0.000007 loss_cls: 3.9973 (3.9137) grad_norm: 2.3553 (2.3959) time: 0.7794 data: 0.0003 max mem: 8426 +[2024-12-10 22:33:10 root] (utils.py 283): INFO Epoch: [20] [2120/2502] eta: 0:04:53 lr: 0.000007 loss_cls: 3.9956 (3.9144) grad_norm: 2.3939 (2.3961) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 22:33:17 root] (utils.py 283): INFO Epoch: [20] [2130/2502] eta: 0:04:46 lr: 0.000007 loss_cls: 4.0081 (3.9147) grad_norm: 2.4134 (2.3963) time: 0.7802 data: 0.0003 max mem: 8426 +[2024-12-10 22:33:25 root] (utils.py 283): INFO Epoch: [20] [2140/2502] eta: 0:04:38 lr: 0.000007 loss_cls: 3.9181 (3.9147) grad_norm: 2.4307 (2.3966) time: 0.7876 data: 0.0003 max mem: 8426 +[2024-12-10 22:33:33 root] (utils.py 283): INFO Epoch: [20] [2150/2502] eta: 0:04:30 lr: 0.000007 loss_cls: 3.8329 (3.9147) grad_norm: 2.3978 (2.3964) time: 0.7881 data: 0.0003 max mem: 8426 +[2024-12-10 22:33:41 root] (utils.py 283): INFO Epoch: [20] [2160/2502] eta: 0:04:23 lr: 0.000007 loss_cls: 3.9311 (3.9141) grad_norm: 2.4005 (2.3965) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 22:33:49 root] (utils.py 283): INFO Epoch: [20] [2170/2502] eta: 0:04:15 lr: 0.000007 loss_cls: 3.9311 (3.9133) grad_norm: 2.4486 (2.3966) time: 0.7776 data: 0.0002 max mem: 8426 +[2024-12-10 22:33:56 root] (utils.py 283): INFO Epoch: [20] [2180/2502] eta: 0:04:07 lr: 0.000007 loss_cls: 4.1650 (3.9142) grad_norm: 2.4350 (2.3968) time: 0.7781 data: 0.0002 max mem: 8426 +[2024-12-10 22:34:04 root] (utils.py 283): INFO Epoch: [20] [2190/2502] eta: 0:04:00 lr: 0.000007 loss_cls: 4.1975 (3.9153) grad_norm: 2.4080 (2.3970) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 22:34:12 root] (utils.py 283): INFO Epoch: [20] [2200/2502] eta: 0:03:52 lr: 0.000007 loss_cls: 4.1553 (3.9152) grad_norm: 2.4080 (2.3973) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 22:34:19 root] (utils.py 283): INFO Epoch: [20] [2210/2502] eta: 0:03:44 lr: 0.000007 loss_cls: 4.2864 (3.9170) grad_norm: 2.3825 (2.3971) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-10 22:34:27 root] (utils.py 283): INFO Epoch: [20] [2220/2502] eta: 0:03:36 lr: 0.000007 loss_cls: 4.0068 (3.9160) grad_norm: 2.3450 (2.3971) time: 0.7576 data: 0.0003 max mem: 8426 +[2024-12-10 22:34:34 root] (utils.py 283): INFO Epoch: [20] [2230/2502] eta: 0:03:29 lr: 0.000007 loss_cls: 3.9812 (3.9159) grad_norm: 2.3855 (2.3974) time: 0.7576 data: 0.0003 max mem: 8426 +[2024-12-10 22:34:42 root] (utils.py 283): INFO Epoch: [20] [2240/2502] eta: 0:03:21 lr: 0.000007 loss_cls: 3.9873 (3.9149) grad_norm: 2.3894 (2.3974) time: 0.7568 data: 0.0003 max mem: 8426 +[2024-12-10 22:34:50 root] (utils.py 283): INFO Epoch: [20] [2250/2502] eta: 0:03:13 lr: 0.000007 loss_cls: 3.9086 (3.9145) grad_norm: 2.3783 (2.3974) time: 0.7582 data: 0.0003 max mem: 8426 +[2024-12-10 22:34:57 root] (utils.py 283): INFO Epoch: [20] [2260/2502] eta: 0:03:06 lr: 0.000007 loss_cls: 3.9570 (3.9144) grad_norm: 2.3665 (2.3972) time: 0.7736 data: 0.0003 max mem: 8426 +[2024-12-10 22:35:05 root] (utils.py 283): INFO Epoch: [20] [2270/2502] eta: 0:02:58 lr: 0.000007 loss_cls: 4.0731 (3.9157) grad_norm: 2.3654 (2.3971) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 22:35:13 root] (utils.py 283): INFO Epoch: [20] [2280/2502] eta: 0:02:50 lr: 0.000007 loss_cls: 4.1181 (3.9156) grad_norm: 2.4069 (2.3973) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 22:35:21 root] (utils.py 283): INFO Epoch: [20] [2290/2502] eta: 0:02:43 lr: 0.000007 loss_cls: 4.0395 (3.9155) grad_norm: 2.4104 (2.3973) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 22:35:28 root] (utils.py 283): INFO Epoch: [20] [2300/2502] eta: 0:02:35 lr: 0.000007 loss_cls: 4.0395 (3.9153) grad_norm: 2.3821 (2.3975) time: 0.7765 data: 0.0002 max mem: 8426 +[2024-12-10 22:35:36 root] (utils.py 283): INFO Epoch: [20] [2310/2502] eta: 0:02:27 lr: 0.000007 loss_cls: 3.4295 (3.9132) grad_norm: 2.3770 (2.3973) time: 0.7746 data: 0.0003 max mem: 8426 +[2024-12-10 22:35:44 root] (utils.py 283): INFO Epoch: [20] [2320/2502] eta: 0:02:19 lr: 0.000007 loss_cls: 3.4040 (3.9126) grad_norm: 2.3202 (2.3970) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 22:35:51 root] (utils.py 283): INFO Epoch: [20] [2330/2502] eta: 0:02:12 lr: 0.000007 loss_cls: 4.0784 (3.9138) grad_norm: 2.2900 (2.3968) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-10 22:35:59 root] (utils.py 283): INFO Epoch: [20] [2340/2502] eta: 0:02:04 lr: 0.000007 loss_cls: 4.2311 (3.9140) grad_norm: 2.3649 (2.3968) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 22:36:07 root] (utils.py 283): INFO Epoch: [20] [2350/2502] eta: 0:01:56 lr: 0.000007 loss_cls: 4.2311 (3.9153) grad_norm: 2.3955 (2.3972) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 22:36:14 root] (utils.py 283): INFO Epoch: [20] [2360/2502] eta: 0:01:49 lr: 0.000007 loss_cls: 4.1703 (3.9148) grad_norm: 2.4077 (2.3973) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 22:36:22 root] (utils.py 283): INFO Epoch: [20] [2370/2502] eta: 0:01:41 lr: 0.000007 loss_cls: 4.1335 (3.9157) grad_norm: 2.3982 (2.3974) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 22:36:30 root] (utils.py 283): INFO Epoch: [20] [2380/2502] eta: 0:01:33 lr: 0.000007 loss_cls: 4.1527 (3.9160) grad_norm: 2.3782 (2.3971) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 22:36:37 root] (utils.py 283): INFO Epoch: [20] [2390/2502] eta: 0:01:26 lr: 0.000007 loss_cls: 3.9227 (3.9155) grad_norm: 2.3609 (2.3971) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 22:36:45 root] (utils.py 283): INFO Epoch: [20] [2400/2502] eta: 0:01:18 lr: 0.000007 loss_cls: 4.0196 (3.9160) grad_norm: 2.3838 (2.3972) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 22:36:53 root] (utils.py 283): INFO Epoch: [20] [2410/2502] eta: 0:01:10 lr: 0.000007 loss_cls: 4.0100 (3.9159) grad_norm: 2.3919 (2.3973) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 22:37:00 root] (utils.py 283): INFO Epoch: [20] [2420/2502] eta: 0:01:03 lr: 0.000007 loss_cls: 3.9313 (3.9163) grad_norm: 2.3297 (2.3972) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 22:37:08 root] (utils.py 283): INFO Epoch: [20] [2430/2502] eta: 0:00:55 lr: 0.000007 loss_cls: 4.0699 (3.9172) grad_norm: 2.3184 (2.3971) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 22:37:16 root] (utils.py 283): INFO Epoch: [20] [2440/2502] eta: 0:00:47 lr: 0.000007 loss_cls: 4.0709 (3.9166) grad_norm: 2.3770 (2.3973) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-10 22:37:23 root] (utils.py 283): INFO Epoch: [20] [2450/2502] eta: 0:00:39 lr: 0.000007 loss_cls: 3.8098 (3.9157) grad_norm: 2.3834 (2.3972) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 22:37:31 root] (utils.py 283): INFO Epoch: [20] [2460/2502] eta: 0:00:32 lr: 0.000007 loss_cls: 3.9028 (3.9157) grad_norm: 2.4087 (2.3974) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 22:37:39 root] (utils.py 283): INFO Epoch: [20] [2470/2502] eta: 0:00:24 lr: 0.000007 loss_cls: 3.9028 (3.9150) grad_norm: 2.4188 (2.3975) time: 0.7758 data: 0.0003 max mem: 8426 +[2024-12-10 22:37:47 root] (utils.py 283): INFO Epoch: [20] [2480/2502] eta: 0:00:16 lr: 0.000007 loss_cls: 4.1912 (3.9155) grad_norm: 2.4024 (2.3977) time: 0.7830 data: 0.0003 max mem: 8426 +[2024-12-10 22:37:55 root] (utils.py 283): INFO Epoch: [20] [2490/2502] eta: 0:00:09 lr: 0.000007 loss_cls: 4.1981 (3.9163) grad_norm: 2.3808 (2.3975) time: 0.8020 data: 0.0238 max mem: 8426 +[2024-12-10 22:38:02 root] (utils.py 283): INFO Epoch: [20] [2500/2502] eta: 0:00:01 lr: 0.000007 loss_cls: 4.0120 (3.9167) grad_norm: 2.3490 (2.3975) time: 0.7906 data: 0.0238 max mem: 8426 +[2024-12-10 22:38:03 root] (utils.py 283): INFO Epoch: [20] [2501/2502] eta: 0:00:00 lr: 0.000007 loss_cls: 4.0221 (3.9168) grad_norm: 2.3483 (2.3975) time: 0.7897 data: 0.0238 max mem: 8426 +[2024-12-10 22:38:03 root] (utils.py 297): INFO Epoch: [20] Total time: 0:32:05 (0.7694 s / it) +[2024-12-10 22:38:03 root] (engine.py 179): INFO Averaged stats:lr: 0.000007 loss_cls: 4.0221 (3.9032) grad_norm: 2.3483 (2.3975) +[2024-12-10 22:38:04 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6247 (0.6247) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 99.2188 (99.2188) time: 0.1275 data: 0.0004 max mem: 8426 +[2024-12-10 22:38:05 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7141 (0.8063) acc1: 86.7188 (82.8125) acc3: 96.0938 (93.7500) acc5: 96.8750 (96.5199) time: 0.1277 data: 0.0004 max mem: 8426 +[2024-12-10 22:38:06 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8549 (0.8617) acc1: 79.6875 (81.5848) acc3: 92.9688 (92.9315) acc5: 94.5312 (95.5357) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 22:38:08 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9623 (0.8748) acc1: 78.9062 (80.7964) acc3: 92.9688 (93.2208) acc5: 96.0938 (95.7409) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-10 22:38:09 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8244 (0.8630) acc1: 80.4688 (81.2309) acc3: 94.5312 (93.3308) acc5: 96.8750 (95.8460) time: 0.1457 data: 0.0181 max mem: 8426 +[2024-12-10 22:38:11 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0443 (0.9478) acc1: 75.0000 (79.1360) acc3: 89.0625 (91.9884) acc5: 92.9688 (94.7917) time: 0.1810 data: 0.0534 max mem: 8426 +[2024-12-10 22:38:13 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2459 (0.9941) acc1: 71.0938 (78.3043) acc3: 86.7188 (91.0476) acc5: 89.8438 (93.9933) time: 0.1818 data: 0.0543 max mem: 8426 +[2024-12-10 22:38:14 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1839 (1.0336) acc1: 74.2188 (77.3327) acc3: 86.7188 (90.4599) acc5: 89.8438 (93.5189) time: 0.1499 data: 0.0223 max mem: 8426 +[2024-12-10 22:38:16 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1859 (1.0680) acc1: 73.4375 (76.4564) acc3: 86.7188 (89.8823) acc5: 89.8438 (93.0170) time: 0.1396 data: 0.0114 max mem: 8426 +[2024-12-10 22:38:17 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2314 (1.0964) acc1: 70.3125 (75.7040) acc3: 85.9375 (89.5175) acc5: 89.8438 (92.7455) time: 0.1384 data: 0.0097 max mem: 8426 +[2024-12-10 22:38:18 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1573 (1.0840) acc1: 74.2188 (75.9440) acc3: 89.0625 (89.7200) acc5: 91.4062 (92.9280) time: 0.1375 data: 0.0115 max mem: 8426 +[2024-12-10 22:38:18 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1461 s / it) +[2024-12-10 22:38:18 root] (engine.py 264): INFO * Acc@1 75.714 Acc@3 89.708 Acc@5 92.908 loss 1.086 flops 1.285 layer_flops 1.251 +[2024-12-10 22:38:18 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.7% +[2024-12-10 22:38:18 root] (main.py 576): INFO Max accuracy: 75.78% +[2024-12-10 22:38:19 root] (utils.py 283): INFO Epoch: [21] [ 0/2502] eta: 0:32:32 lr: 0.000006 loss_cls: 4.2236 (4.2236) grad_norm: 2.5457 (2.5457) time: 0.7806 data: 0.0003 max mem: 8426 +[2024-12-10 22:38:26 root] (utils.py 283): INFO Epoch: [21] [ 10/2502] eta: 0:31:56 lr: 0.000006 loss_cls: 4.2236 (4.1803) grad_norm: 2.4943 (2.4645) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 22:38:34 root] (utils.py 283): INFO Epoch: [21] [ 20/2502] eta: 0:31:46 lr: 0.000006 loss_cls: 4.2300 (4.1644) grad_norm: 2.4906 (2.4625) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 22:38:42 root] (utils.py 283): INFO Epoch: [21] [ 30/2502] eta: 0:31:33 lr: 0.000006 loss_cls: 4.2300 (4.1261) grad_norm: 2.4088 (2.4317) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 22:38:49 root] (utils.py 283): INFO Epoch: [21] [ 40/2502] eta: 0:31:21 lr: 0.000006 loss_cls: 4.2322 (4.1613) grad_norm: 2.3209 (2.4170) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 22:38:57 root] (utils.py 283): INFO Epoch: [21] [ 50/2502] eta: 0:31:15 lr: 0.000006 loss_cls: 4.2007 (4.1286) grad_norm: 2.3560 (2.4130) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 22:39:05 root] (utils.py 283): INFO Epoch: [21] [ 60/2502] eta: 0:31:06 lr: 0.000006 loss_cls: 3.8863 (4.0776) grad_norm: 2.4378 (2.4187) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 22:39:12 root] (utils.py 283): INFO Epoch: [21] [ 70/2502] eta: 0:30:58 lr: 0.000006 loss_cls: 4.0794 (4.0732) grad_norm: 2.4623 (2.4192) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 22:39:20 root] (utils.py 283): INFO Epoch: [21] [ 80/2502] eta: 0:30:51 lr: 0.000006 loss_cls: 4.1076 (4.0473) grad_norm: 2.4542 (2.4222) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 22:39:27 root] (utils.py 283): INFO Epoch: [21] [ 90/2502] eta: 0:30:41 lr: 0.000006 loss_cls: 3.8394 (4.0018) grad_norm: 2.4472 (2.4263) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 22:39:35 root] (utils.py 283): INFO Epoch: [21] [ 100/2502] eta: 0:30:33 lr: 0.000006 loss_cls: 3.8135 (4.0158) grad_norm: 2.4042 (2.4228) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 22:39:43 root] (utils.py 283): INFO Epoch: [21] [ 110/2502] eta: 0:30:24 lr: 0.000006 loss_cls: 3.8319 (3.9891) grad_norm: 2.3687 (2.4208) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-10 22:39:50 root] (utils.py 283): INFO Epoch: [21] [ 120/2502] eta: 0:30:16 lr: 0.000006 loss_cls: 3.3481 (3.9387) grad_norm: 2.4209 (2.4216) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 22:39:58 root] (utils.py 283): INFO Epoch: [21] [ 130/2502] eta: 0:30:08 lr: 0.000006 loss_cls: 3.4181 (3.9241) grad_norm: 2.4347 (2.4222) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:05 root] (utils.py 283): INFO Epoch: [21] [ 140/2502] eta: 0:30:00 lr: 0.000006 loss_cls: 4.0116 (3.9208) grad_norm: 2.4178 (2.4191) time: 0.7592 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:13 root] (utils.py 283): INFO Epoch: [21] [ 150/2502] eta: 0:29:51 lr: 0.000006 loss_cls: 4.0187 (3.9252) grad_norm: 2.3726 (2.4164) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:21 root] (utils.py 283): INFO Epoch: [21] [ 160/2502] eta: 0:29:43 lr: 0.000006 loss_cls: 4.2128 (3.9339) grad_norm: 2.3995 (2.4163) time: 0.7586 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:28 root] (utils.py 283): INFO Epoch: [21] [ 170/2502] eta: 0:29:39 lr: 0.000006 loss_cls: 4.1190 (3.9332) grad_norm: 2.3567 (2.4146) time: 0.7720 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:36 root] (utils.py 283): INFO Epoch: [21] [ 180/2502] eta: 0:29:32 lr: 0.000006 loss_cls: 4.1169 (3.9485) grad_norm: 2.3492 (2.4148) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:44 root] (utils.py 283): INFO Epoch: [21] [ 190/2502] eta: 0:29:25 lr: 0.000006 loss_cls: 4.0578 (3.9408) grad_norm: 2.3290 (2.4122) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:51 root] (utils.py 283): INFO Epoch: [21] [ 200/2502] eta: 0:29:17 lr: 0.000006 loss_cls: 3.8617 (3.9312) grad_norm: 2.3603 (2.4140) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 22:40:59 root] (utils.py 283): INFO Epoch: [21] [ 210/2502] eta: 0:29:09 lr: 0.000006 loss_cls: 4.0327 (3.9329) grad_norm: 2.4019 (2.4137) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 22:41:07 root] (utils.py 283): INFO Epoch: [21] [ 220/2502] eta: 0:29:02 lr: 0.000006 loss_cls: 4.1438 (3.9417) grad_norm: 2.4350 (2.4113) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 22:41:14 root] (utils.py 283): INFO Epoch: [21] [ 230/2502] eta: 0:28:55 lr: 0.000006 loss_cls: 3.9347 (3.9347) grad_norm: 2.3501 (2.4091) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 22:41:22 root] (utils.py 283): INFO Epoch: [21] [ 240/2502] eta: 0:28:47 lr: 0.000006 loss_cls: 3.5277 (3.9188) grad_norm: 2.3984 (2.4099) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 22:41:30 root] (utils.py 283): INFO Epoch: [21] [ 250/2502] eta: 0:28:40 lr: 0.000006 loss_cls: 3.5999 (3.9156) grad_norm: 2.4170 (2.4092) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 22:41:37 root] (utils.py 283): INFO Epoch: [21] [ 260/2502] eta: 0:28:34 lr: 0.000006 loss_cls: 3.9253 (3.9104) grad_norm: 2.4170 (2.4104) time: 0.7729 data: 0.0003 max mem: 8426 +[2024-12-10 22:41:45 root] (utils.py 283): INFO Epoch: [21] [ 270/2502] eta: 0:28:27 lr: 0.000006 loss_cls: 3.9253 (3.9155) grad_norm: 2.4409 (2.4113) time: 0.7780 data: 0.0002 max mem: 8426 +[2024-12-10 22:41:53 root] (utils.py 283): INFO Epoch: [21] [ 280/2502] eta: 0:28:20 lr: 0.000006 loss_cls: 3.7944 (3.9076) grad_norm: 2.4121 (2.4112) time: 0.7744 data: 0.0002 max mem: 8426 +[2024-12-10 22:42:01 root] (utils.py 283): INFO Epoch: [21] [ 290/2502] eta: 0:28:13 lr: 0.000006 loss_cls: 3.5194 (3.9047) grad_norm: 2.4145 (2.4120) time: 0.7739 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:08 root] (utils.py 283): INFO Epoch: [21] [ 300/2502] eta: 0:28:05 lr: 0.000006 loss_cls: 3.6443 (3.8971) grad_norm: 2.4492 (2.4135) time: 0.7714 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:16 root] (utils.py 283): INFO Epoch: [21] [ 310/2502] eta: 0:27:58 lr: 0.000006 loss_cls: 3.8248 (3.8932) grad_norm: 2.4713 (2.4155) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:24 root] (utils.py 283): INFO Epoch: [21] [ 320/2502] eta: 0:27:50 lr: 0.000006 loss_cls: 3.9844 (3.8961) grad_norm: 2.4473 (2.4172) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:31 root] (utils.py 283): INFO Epoch: [21] [ 330/2502] eta: 0:27:43 lr: 0.000006 loss_cls: 3.9844 (3.8952) grad_norm: 2.3994 (2.4155) time: 0.7676 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:39 root] (utils.py 283): INFO Epoch: [21] [ 340/2502] eta: 0:27:35 lr: 0.000006 loss_cls: 4.0908 (3.8986) grad_norm: 2.3969 (2.4150) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:47 root] (utils.py 283): INFO Epoch: [21] [ 350/2502] eta: 0:27:27 lr: 0.000006 loss_cls: 4.0794 (3.8988) grad_norm: 2.4099 (2.4143) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 22:42:54 root] (utils.py 283): INFO Epoch: [21] [ 360/2502] eta: 0:27:19 lr: 0.000006 loss_cls: 3.8709 (3.8878) grad_norm: 2.4059 (2.4139) time: 0.7580 data: 0.0003 max mem: 8426 +[2024-12-10 22:43:02 root] (utils.py 283): INFO Epoch: [21] [ 370/2502] eta: 0:27:11 lr: 0.000006 loss_cls: 3.3018 (3.8771) grad_norm: 2.3818 (2.4140) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 22:43:10 root] (utils.py 283): INFO Epoch: [21] [ 380/2502] eta: 0:27:04 lr: 0.000006 loss_cls: 3.9476 (3.8811) grad_norm: 2.4276 (2.4185) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 22:43:17 root] (utils.py 283): INFO Epoch: [21] [ 390/2502] eta: 0:26:57 lr: 0.000006 loss_cls: 4.0748 (3.8825) grad_norm: 2.3782 (2.4160) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-10 22:43:25 root] (utils.py 283): INFO Epoch: [21] [ 400/2502] eta: 0:26:49 lr: 0.000006 loss_cls: 4.0013 (3.8863) grad_norm: 2.3153 (2.4148) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 22:43:33 root] (utils.py 283): INFO Epoch: [21] [ 410/2502] eta: 0:26:43 lr: 0.000006 loss_cls: 4.0013 (3.8843) grad_norm: 2.3193 (2.4124) time: 0.7759 data: 0.0002 max mem: 8426 +[2024-12-10 22:43:41 root] (utils.py 283): INFO Epoch: [21] [ 420/2502] eta: 0:26:35 lr: 0.000006 loss_cls: 3.9478 (3.8783) grad_norm: 2.3599 (2.4142) time: 0.7788 data: 0.0002 max mem: 8426 +[2024-12-10 22:43:48 root] (utils.py 283): INFO Epoch: [21] [ 430/2502] eta: 0:26:27 lr: 0.000006 loss_cls: 3.8679 (3.8750) grad_norm: 2.4054 (2.4131) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 22:43:56 root] (utils.py 283): INFO Epoch: [21] [ 440/2502] eta: 0:26:20 lr: 0.000006 loss_cls: 4.1297 (3.8817) grad_norm: 2.4035 (2.4125) time: 0.7701 data: 0.0003 max mem: 8426 +[2024-12-10 22:44:04 root] (utils.py 283): INFO Epoch: [21] [ 450/2502] eta: 0:26:13 lr: 0.000006 loss_cls: 4.2665 (3.8839) grad_norm: 2.4116 (2.4149) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-10 22:44:12 root] (utils.py 283): INFO Epoch: [21] [ 460/2502] eta: 0:26:06 lr: 0.000006 loss_cls: 4.1005 (3.8848) grad_norm: 2.4116 (2.4144) time: 0.7744 data: 0.0003 max mem: 8426 +[2024-12-10 22:44:19 root] (utils.py 283): INFO Epoch: [21] [ 470/2502] eta: 0:25:58 lr: 0.000006 loss_cls: 3.6798 (3.8843) grad_norm: 2.3824 (2.4130) time: 0.7736 data: 0.0003 max mem: 8426 +[2024-12-10 22:44:27 root] (utils.py 283): INFO Epoch: [21] [ 480/2502] eta: 0:25:50 lr: 0.000006 loss_cls: 3.9442 (3.8870) grad_norm: 2.3923 (2.4132) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 22:44:34 root] (utils.py 283): INFO Epoch: [21] [ 490/2502] eta: 0:25:42 lr: 0.000006 loss_cls: 3.9669 (3.8889) grad_norm: 2.3719 (2.4129) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-10 22:44:42 root] (utils.py 283): INFO Epoch: [21] [ 500/2502] eta: 0:25:35 lr: 0.000006 loss_cls: 4.0254 (3.8922) grad_norm: 2.4073 (2.4138) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 22:44:50 root] (utils.py 283): INFO Epoch: [21] [ 510/2502] eta: 0:25:27 lr: 0.000006 loss_cls: 4.1699 (3.8885) grad_norm: 2.3543 (2.4124) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 22:44:58 root] (utils.py 283): INFO Epoch: [21] [ 520/2502] eta: 0:25:20 lr: 0.000006 loss_cls: 4.1699 (3.8941) grad_norm: 2.3267 (2.4122) time: 0.7736 data: 0.0002 max mem: 8426 +[2024-12-10 22:45:05 root] (utils.py 283): INFO Epoch: [21] [ 530/2502] eta: 0:25:13 lr: 0.000006 loss_cls: 4.0582 (3.8904) grad_norm: 2.3747 (2.4116) time: 0.7818 data: 0.0002 max mem: 8426 +[2024-12-10 22:45:13 root] (utils.py 283): INFO Epoch: [21] [ 540/2502] eta: 0:25:05 lr: 0.000006 loss_cls: 3.8418 (3.8912) grad_norm: 2.3648 (2.4106) time: 0.7786 data: 0.0002 max mem: 8426 +[2024-12-10 22:45:21 root] (utils.py 283): INFO Epoch: [21] [ 550/2502] eta: 0:24:57 lr: 0.000006 loss_cls: 3.8418 (3.8875) grad_norm: 2.3649 (2.4103) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-10 22:45:28 root] (utils.py 283): INFO Epoch: [21] [ 560/2502] eta: 0:24:50 lr: 0.000006 loss_cls: 4.1923 (3.8908) grad_norm: 2.4190 (2.4097) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 22:45:36 root] (utils.py 283): INFO Epoch: [21] [ 570/2502] eta: 0:24:42 lr: 0.000006 loss_cls: 4.1470 (3.8906) grad_norm: 2.3982 (2.4101) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 22:45:44 root] (utils.py 283): INFO Epoch: [21] [ 580/2502] eta: 0:24:34 lr: 0.000006 loss_cls: 4.2122 (3.8961) grad_norm: 2.3932 (2.4099) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 22:45:51 root] (utils.py 283): INFO Epoch: [21] [ 590/2502] eta: 0:24:27 lr: 0.000006 loss_cls: 4.1772 (3.8926) grad_norm: 2.3957 (2.4100) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 22:45:59 root] (utils.py 283): INFO Epoch: [21] [ 600/2502] eta: 0:24:19 lr: 0.000006 loss_cls: 3.9209 (3.8918) grad_norm: 2.4163 (2.4103) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 22:46:07 root] (utils.py 283): INFO Epoch: [21] [ 610/2502] eta: 0:24:11 lr: 0.000006 loss_cls: 3.9357 (3.8934) grad_norm: 2.4163 (2.4101) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 22:46:14 root] (utils.py 283): INFO Epoch: [21] [ 620/2502] eta: 0:24:03 lr: 0.000006 loss_cls: 4.2656 (3.9001) grad_norm: 2.3997 (2.4096) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 22:46:22 root] (utils.py 283): INFO Epoch: [21] [ 630/2502] eta: 0:23:55 lr: 0.000006 loss_cls: 4.2294 (3.8943) grad_norm: 2.4047 (2.4092) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 22:46:30 root] (utils.py 283): INFO Epoch: [21] [ 640/2502] eta: 0:23:47 lr: 0.000006 loss_cls: 3.5365 (3.8907) grad_norm: 2.3703 (2.4093) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-10 22:46:37 root] (utils.py 283): INFO Epoch: [21] [ 650/2502] eta: 0:23:40 lr: 0.000006 loss_cls: 4.0322 (3.8923) grad_norm: 2.3703 (2.4097) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 22:46:45 root] (utils.py 283): INFO Epoch: [21] [ 660/2502] eta: 0:23:32 lr: 0.000006 loss_cls: 4.0331 (3.8889) grad_norm: 2.3310 (2.4092) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 22:46:53 root] (utils.py 283): INFO Epoch: [21] [ 670/2502] eta: 0:23:25 lr: 0.000006 loss_cls: 3.4613 (3.8849) grad_norm: 2.3664 (2.4095) time: 0.7729 data: 0.0002 max mem: 8426 +[2024-12-10 22:47:01 root] (utils.py 283): INFO Epoch: [21] [ 680/2502] eta: 0:23:18 lr: 0.000006 loss_cls: 3.9843 (3.8857) grad_norm: 2.3981 (2.4090) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-10 22:47:08 root] (utils.py 283): INFO Epoch: [21] [ 690/2502] eta: 0:23:10 lr: 0.000006 loss_cls: 3.9843 (3.8841) grad_norm: 2.3723 (2.4084) time: 0.7805 data: 0.0003 max mem: 8426 +[2024-12-10 22:47:16 root] (utils.py 283): INFO Epoch: [21] [ 700/2502] eta: 0:23:02 lr: 0.000006 loss_cls: 3.9291 (3.8821) grad_norm: 2.3547 (2.4085) time: 0.7701 data: 0.0003 max mem: 8426 +[2024-12-10 22:47:24 root] (utils.py 283): INFO Epoch: [21] [ 710/2502] eta: 0:22:55 lr: 0.000006 loss_cls: 3.8431 (3.8804) grad_norm: 2.4378 (2.4090) time: 0.7723 data: 0.0003 max mem: 8426 +[2024-12-10 22:47:31 root] (utils.py 283): INFO Epoch: [21] [ 720/2502] eta: 0:22:47 lr: 0.000006 loss_cls: 3.9099 (3.8791) grad_norm: 2.4183 (2.4097) time: 0.7749 data: 0.0003 max mem: 8426 +[2024-12-10 22:47:39 root] (utils.py 283): INFO Epoch: [21] [ 730/2502] eta: 0:22:40 lr: 0.000006 loss_cls: 3.8937 (3.8774) grad_norm: 2.3853 (2.4092) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 22:47:47 root] (utils.py 283): INFO Epoch: [21] [ 740/2502] eta: 0:22:32 lr: 0.000006 loss_cls: 4.0532 (3.8814) grad_norm: 2.3380 (2.4085) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 22:47:55 root] (utils.py 283): INFO Epoch: [21] [ 750/2502] eta: 0:22:24 lr: 0.000006 loss_cls: 4.1888 (3.8813) grad_norm: 2.3279 (2.4079) time: 0.7706 data: 0.0003 max mem: 8426 +[2024-12-10 22:48:02 root] (utils.py 283): INFO Epoch: [21] [ 760/2502] eta: 0:22:17 lr: 0.000006 loss_cls: 3.9103 (3.8817) grad_norm: 2.3024 (2.4073) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:10 root] (utils.py 283): INFO Epoch: [21] [ 770/2502] eta: 0:22:09 lr: 0.000006 loss_cls: 3.9103 (3.8817) grad_norm: 2.3113 (2.4069) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:17 root] (utils.py 283): INFO Epoch: [21] [ 780/2502] eta: 0:22:01 lr: 0.000006 loss_cls: 3.8693 (3.8771) grad_norm: 2.3949 (2.4073) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:25 root] (utils.py 283): INFO Epoch: [21] [ 790/2502] eta: 0:21:54 lr: 0.000006 loss_cls: 3.6596 (3.8759) grad_norm: 2.3877 (2.4065) time: 0.7819 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:33 root] (utils.py 283): INFO Epoch: [21] [ 800/2502] eta: 0:21:47 lr: 0.000006 loss_cls: 3.9806 (3.8759) grad_norm: 2.3416 (2.4062) time: 0.7912 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:41 root] (utils.py 283): INFO Epoch: [21] [ 810/2502] eta: 0:21:40 lr: 0.000006 loss_cls: 3.9723 (3.8763) grad_norm: 2.3652 (2.4063) time: 0.7898 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:49 root] (utils.py 283): INFO Epoch: [21] [ 820/2502] eta: 0:21:32 lr: 0.000006 loss_cls: 4.0629 (3.8790) grad_norm: 2.3652 (2.4059) time: 0.7861 data: 0.0002 max mem: 8426 +[2024-12-10 22:48:57 root] (utils.py 283): INFO Epoch: [21] [ 830/2502] eta: 0:21:25 lr: 0.000006 loss_cls: 4.2031 (3.8795) grad_norm: 2.3785 (2.4059) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 22:49:05 root] (utils.py 283): INFO Epoch: [21] [ 840/2502] eta: 0:21:18 lr: 0.000006 loss_cls: 3.6980 (3.8754) grad_norm: 2.3313 (2.4058) time: 0.7849 data: 0.0002 max mem: 8426 +[2024-12-10 22:49:13 root] (utils.py 283): INFO Epoch: [21] [ 850/2502] eta: 0:21:10 lr: 0.000006 loss_cls: 3.5877 (3.8742) grad_norm: 2.3924 (2.4067) time: 0.7855 data: 0.0003 max mem: 8426 +[2024-12-10 22:49:20 root] (utils.py 283): INFO Epoch: [21] [ 860/2502] eta: 0:21:03 lr: 0.000006 loss_cls: 3.6761 (3.8747) grad_norm: 2.3892 (2.4066) time: 0.7865 data: 0.0003 max mem: 8426 +[2024-12-10 22:49:28 root] (utils.py 283): INFO Epoch: [21] [ 870/2502] eta: 0:20:55 lr: 0.000006 loss_cls: 4.0816 (3.8762) grad_norm: 2.3640 (2.4058) time: 0.7781 data: 0.0003 max mem: 8426 +[2024-12-10 22:49:36 root] (utils.py 283): INFO Epoch: [21] [ 880/2502] eta: 0:20:47 lr: 0.000006 loss_cls: 3.9993 (3.8759) grad_norm: 2.3705 (2.4060) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 22:49:43 root] (utils.py 283): INFO Epoch: [21] [ 890/2502] eta: 0:20:39 lr: 0.000006 loss_cls: 3.7776 (3.8749) grad_norm: 2.3727 (2.4058) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 22:49:51 root] (utils.py 283): INFO Epoch: [21] [ 900/2502] eta: 0:20:32 lr: 0.000006 loss_cls: 3.7776 (3.8748) grad_norm: 2.3727 (2.4061) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 22:49:59 root] (utils.py 283): INFO Epoch: [21] [ 910/2502] eta: 0:20:24 lr: 0.000006 loss_cls: 4.1057 (3.8751) grad_norm: 2.3695 (2.4059) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-10 22:50:06 root] (utils.py 283): INFO Epoch: [21] [ 920/2502] eta: 0:20:16 lr: 0.000006 loss_cls: 3.4636 (3.8710) grad_norm: 2.3695 (2.4060) time: 0.7603 data: 0.0002 max mem: 8426 +[2024-12-10 22:50:14 root] (utils.py 283): INFO Epoch: [21] [ 930/2502] eta: 0:20:08 lr: 0.000006 loss_cls: 3.5402 (3.8719) grad_norm: 2.3721 (2.4058) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 22:50:21 root] (utils.py 283): INFO Epoch: [21] [ 940/2502] eta: 0:20:00 lr: 0.000006 loss_cls: 3.7716 (3.8692) grad_norm: 2.3855 (2.4053) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 22:50:29 root] (utils.py 283): INFO Epoch: [21] [ 950/2502] eta: 0:19:53 lr: 0.000006 loss_cls: 3.7874 (3.8726) grad_norm: 2.4111 (2.4061) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 22:50:37 root] (utils.py 283): INFO Epoch: [21] [ 960/2502] eta: 0:19:45 lr: 0.000006 loss_cls: 4.1388 (3.8724) grad_norm: 2.4165 (2.4062) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 22:50:44 root] (utils.py 283): INFO Epoch: [21] [ 970/2502] eta: 0:19:37 lr: 0.000006 loss_cls: 4.0349 (3.8734) grad_norm: 2.3808 (2.4051) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 22:50:52 root] (utils.py 283): INFO Epoch: [21] [ 980/2502] eta: 0:19:29 lr: 0.000006 loss_cls: 4.0196 (3.8743) grad_norm: 2.3659 (2.4055) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 22:51:00 root] (utils.py 283): INFO Epoch: [21] [ 990/2502] eta: 0:19:21 lr: 0.000006 loss_cls: 4.1232 (3.8768) grad_norm: 2.4057 (2.4049) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-10 22:51:07 root] (utils.py 283): INFO Epoch: [21] [1000/2502] eta: 0:19:14 lr: 0.000006 loss_cls: 4.1115 (3.8755) grad_norm: 2.3876 (2.4049) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 22:51:15 root] (utils.py 283): INFO Epoch: [21] [1010/2502] eta: 0:19:06 lr: 0.000006 loss_cls: 3.9741 (3.8772) grad_norm: 2.3697 (2.4044) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 22:51:23 root] (utils.py 283): INFO Epoch: [21] [1020/2502] eta: 0:18:58 lr: 0.000006 loss_cls: 3.9741 (3.8766) grad_norm: 2.4016 (2.4046) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 22:51:30 root] (utils.py 283): INFO Epoch: [21] [1030/2502] eta: 0:18:51 lr: 0.000006 loss_cls: 3.8917 (3.8773) grad_norm: 2.3877 (2.4044) time: 0.7719 data: 0.0003 max mem: 8426 +[2024-12-10 22:51:38 root] (utils.py 283): INFO Epoch: [21] [1040/2502] eta: 0:18:43 lr: 0.000006 loss_cls: 4.0763 (3.8782) grad_norm: 2.3400 (2.4037) time: 0.7764 data: 0.0002 max mem: 8426 +[2024-12-10 22:51:46 root] (utils.py 283): INFO Epoch: [21] [1050/2502] eta: 0:18:36 lr: 0.000006 loss_cls: 4.0830 (3.8820) grad_norm: 2.3242 (2.4034) time: 0.7730 data: 0.0002 max mem: 8426 +[2024-12-10 22:51:53 root] (utils.py 283): INFO Epoch: [21] [1060/2502] eta: 0:18:28 lr: 0.000006 loss_cls: 4.3555 (3.8835) grad_norm: 2.3477 (2.4038) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:01 root] (utils.py 283): INFO Epoch: [21] [1070/2502] eta: 0:18:20 lr: 0.000006 loss_cls: 3.8920 (3.8826) grad_norm: 2.3477 (2.4031) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:09 root] (utils.py 283): INFO Epoch: [21] [1080/2502] eta: 0:18:12 lr: 0.000006 loss_cls: 3.8920 (3.8803) grad_norm: 2.3466 (2.4030) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:16 root] (utils.py 283): INFO Epoch: [21] [1090/2502] eta: 0:18:05 lr: 0.000006 loss_cls: 3.7977 (3.8790) grad_norm: 2.3471 (2.4029) time: 0.7680 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:24 root] (utils.py 283): INFO Epoch: [21] [1100/2502] eta: 0:17:57 lr: 0.000006 loss_cls: 3.4911 (3.8750) grad_norm: 2.3575 (2.4029) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:32 root] (utils.py 283): INFO Epoch: [21] [1110/2502] eta: 0:17:49 lr: 0.000006 loss_cls: 3.7412 (3.8756) grad_norm: 2.3590 (2.4026) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:40 root] (utils.py 283): INFO Epoch: [21] [1120/2502] eta: 0:17:42 lr: 0.000006 loss_cls: 3.8704 (3.8733) grad_norm: 2.3558 (2.4026) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:47 root] (utils.py 283): INFO Epoch: [21] [1130/2502] eta: 0:17:34 lr: 0.000006 loss_cls: 3.6869 (3.8720) grad_norm: 2.3558 (2.4025) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 22:52:55 root] (utils.py 283): INFO Epoch: [21] [1140/2502] eta: 0:17:26 lr: 0.000006 loss_cls: 3.7797 (3.8720) grad_norm: 2.3847 (2.4029) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 22:53:03 root] (utils.py 283): INFO Epoch: [21] [1150/2502] eta: 0:17:18 lr: 0.000006 loss_cls: 3.9665 (3.8738) grad_norm: 2.4790 (2.4037) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-10 22:53:10 root] (utils.py 283): INFO Epoch: [21] [1160/2502] eta: 0:17:11 lr: 0.000006 loss_cls: 4.0235 (3.8748) grad_norm: 2.4790 (2.4047) time: 0.7747 data: 0.0003 max mem: 8426 +[2024-12-10 22:53:18 root] (utils.py 283): INFO Epoch: [21] [1170/2502] eta: 0:17:03 lr: 0.000006 loss_cls: 4.0235 (3.8748) grad_norm: 2.4424 (2.4052) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 22:53:26 root] (utils.py 283): INFO Epoch: [21] [1180/2502] eta: 0:16:55 lr: 0.000006 loss_cls: 3.8376 (3.8742) grad_norm: 2.3698 (2.4047) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 22:53:33 root] (utils.py 283): INFO Epoch: [21] [1190/2502] eta: 0:16:48 lr: 0.000006 loss_cls: 4.0549 (3.8752) grad_norm: 2.3536 (2.4051) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 22:53:41 root] (utils.py 283): INFO Epoch: [21] [1200/2502] eta: 0:16:40 lr: 0.000006 loss_cls: 4.0819 (3.8762) grad_norm: 2.4035 (2.4053) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 22:53:49 root] (utils.py 283): INFO Epoch: [21] [1210/2502] eta: 0:16:32 lr: 0.000006 loss_cls: 3.9303 (3.8756) grad_norm: 2.4027 (2.4054) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 22:53:56 root] (utils.py 283): INFO Epoch: [21] [1220/2502] eta: 0:16:25 lr: 0.000006 loss_cls: 3.8609 (3.8763) grad_norm: 2.3424 (2.4050) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:04 root] (utils.py 283): INFO Epoch: [21] [1230/2502] eta: 0:16:17 lr: 0.000006 loss_cls: 3.9331 (3.8759) grad_norm: 2.3201 (2.4043) time: 0.7720 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:12 root] (utils.py 283): INFO Epoch: [21] [1240/2502] eta: 0:16:09 lr: 0.000006 loss_cls: 4.0609 (3.8761) grad_norm: 2.3792 (2.4050) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:19 root] (utils.py 283): INFO Epoch: [21] [1250/2502] eta: 0:16:01 lr: 0.000006 loss_cls: 3.8635 (3.8755) grad_norm: 2.4466 (2.4050) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:27 root] (utils.py 283): INFO Epoch: [21] [1260/2502] eta: 0:15:54 lr: 0.000006 loss_cls: 3.8635 (3.8754) grad_norm: 2.3784 (2.4047) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:35 root] (utils.py 283): INFO Epoch: [21] [1270/2502] eta: 0:15:46 lr: 0.000006 loss_cls: 4.0675 (3.8765) grad_norm: 2.3681 (2.4048) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:42 root] (utils.py 283): INFO Epoch: [21] [1280/2502] eta: 0:15:38 lr: 0.000006 loss_cls: 4.0792 (3.8791) grad_norm: 2.4420 (2.4050) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 22:54:50 root] (utils.py 283): INFO Epoch: [21] [1290/2502] eta: 0:15:31 lr: 0.000006 loss_cls: 4.0765 (3.8792) grad_norm: 2.3679 (2.4046) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 22:54:58 root] (utils.py 283): INFO Epoch: [21] [1300/2502] eta: 0:15:23 lr: 0.000006 loss_cls: 4.1227 (3.8816) grad_norm: 2.3545 (2.4044) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 22:55:05 root] (utils.py 283): INFO Epoch: [21] [1310/2502] eta: 0:15:15 lr: 0.000006 loss_cls: 4.1227 (3.8817) grad_norm: 2.3770 (2.4044) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 22:55:13 root] (utils.py 283): INFO Epoch: [21] [1320/2502] eta: 0:15:08 lr: 0.000006 loss_cls: 3.7963 (3.8813) grad_norm: 2.3263 (2.4039) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-10 22:55:21 root] (utils.py 283): INFO Epoch: [21] [1330/2502] eta: 0:15:00 lr: 0.000006 loss_cls: 4.1186 (3.8819) grad_norm: 2.3206 (2.4045) time: 0.7687 data: 0.0003 max mem: 8426 +[2024-12-10 22:55:28 root] (utils.py 283): INFO Epoch: [21] [1340/2502] eta: 0:14:52 lr: 0.000006 loss_cls: 4.3007 (3.8840) grad_norm: 2.3770 (2.4042) time: 0.7698 data: 0.0003 max mem: 8426 +[2024-12-10 22:55:36 root] (utils.py 283): INFO Epoch: [21] [1350/2502] eta: 0:14:44 lr: 0.000006 loss_cls: 4.2197 (3.8850) grad_norm: 2.4170 (2.4045) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 22:55:44 root] (utils.py 283): INFO Epoch: [21] [1360/2502] eta: 0:14:37 lr: 0.000006 loss_cls: 4.1713 (3.8862) grad_norm: 2.4059 (2.4042) time: 0.7720 data: 0.0003 max mem: 8426 +[2024-12-10 22:55:51 root] (utils.py 283): INFO Epoch: [21] [1370/2502] eta: 0:14:29 lr: 0.000006 loss_cls: 3.9899 (3.8859) grad_norm: 2.3353 (2.4040) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-10 22:55:59 root] (utils.py 283): INFO Epoch: [21] [1380/2502] eta: 0:14:21 lr: 0.000006 loss_cls: 3.9214 (3.8842) grad_norm: 2.3353 (2.4038) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 22:56:07 root] (utils.py 283): INFO Epoch: [21] [1390/2502] eta: 0:14:14 lr: 0.000006 loss_cls: 3.9588 (3.8842) grad_norm: 2.3779 (2.4039) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 22:56:14 root] (utils.py 283): INFO Epoch: [21] [1400/2502] eta: 0:14:06 lr: 0.000006 loss_cls: 4.0649 (3.8862) grad_norm: 2.3793 (2.4040) time: 0.7601 data: 0.0003 max mem: 8426 +[2024-12-10 22:56:22 root] (utils.py 283): INFO Epoch: [21] [1410/2502] eta: 0:13:58 lr: 0.000006 loss_cls: 4.0528 (3.8867) grad_norm: 2.3491 (2.4038) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 22:56:30 root] (utils.py 283): INFO Epoch: [21] [1420/2502] eta: 0:13:51 lr: 0.000006 loss_cls: 3.7646 (3.8865) grad_norm: 2.3586 (2.4036) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 22:56:37 root] (utils.py 283): INFO Epoch: [21] [1430/2502] eta: 0:13:43 lr: 0.000006 loss_cls: 3.7430 (3.8850) grad_norm: 2.4077 (2.4041) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 22:56:45 root] (utils.py 283): INFO Epoch: [21] [1440/2502] eta: 0:13:35 lr: 0.000006 loss_cls: 3.7430 (3.8837) grad_norm: 2.3918 (2.4040) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 22:56:52 root] (utils.py 283): INFO Epoch: [21] [1450/2502] eta: 0:13:27 lr: 0.000006 loss_cls: 4.0857 (3.8845) grad_norm: 2.3398 (2.4035) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:00 root] (utils.py 283): INFO Epoch: [21] [1460/2502] eta: 0:13:20 lr: 0.000006 loss_cls: 4.0857 (3.8837) grad_norm: 2.3483 (2.4034) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:08 root] (utils.py 283): INFO Epoch: [21] [1470/2502] eta: 0:13:12 lr: 0.000006 loss_cls: 4.0026 (3.8834) grad_norm: 2.4147 (2.4039) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:15 root] (utils.py 283): INFO Epoch: [21] [1480/2502] eta: 0:13:04 lr: 0.000006 loss_cls: 4.0726 (3.8827) grad_norm: 2.4563 (2.4038) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:23 root] (utils.py 283): INFO Epoch: [21] [1490/2502] eta: 0:12:57 lr: 0.000006 loss_cls: 4.1471 (3.8856) grad_norm: 2.3405 (2.4034) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:31 root] (utils.py 283): INFO Epoch: [21] [1500/2502] eta: 0:12:49 lr: 0.000006 loss_cls: 4.0533 (3.8855) grad_norm: 2.3459 (2.4033) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:38 root] (utils.py 283): INFO Epoch: [21] [1510/2502] eta: 0:12:41 lr: 0.000006 loss_cls: 3.9395 (3.8855) grad_norm: 2.4077 (2.4033) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:46 root] (utils.py 283): INFO Epoch: [21] [1520/2502] eta: 0:12:34 lr: 0.000006 loss_cls: 4.0683 (3.8870) grad_norm: 2.3390 (2.4031) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 22:57:54 root] (utils.py 283): INFO Epoch: [21] [1530/2502] eta: 0:12:26 lr: 0.000006 loss_cls: 3.9835 (3.8864) grad_norm: 2.3390 (2.4030) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 22:58:01 root] (utils.py 283): INFO Epoch: [21] [1540/2502] eta: 0:12:18 lr: 0.000006 loss_cls: 3.9835 (3.8879) grad_norm: 2.3863 (2.4031) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 22:58:09 root] (utils.py 283): INFO Epoch: [21] [1550/2502] eta: 0:12:10 lr: 0.000006 loss_cls: 4.2119 (3.8900) grad_norm: 2.4250 (2.4035) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 22:58:17 root] (utils.py 283): INFO Epoch: [21] [1560/2502] eta: 0:12:03 lr: 0.000006 loss_cls: 3.8829 (3.8871) grad_norm: 2.4159 (2.4035) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 22:58:24 root] (utils.py 283): INFO Epoch: [21] [1570/2502] eta: 0:11:55 lr: 0.000006 loss_cls: 3.5777 (3.8871) grad_norm: 2.3719 (2.4031) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 22:58:32 root] (utils.py 283): INFO Epoch: [21] [1580/2502] eta: 0:11:47 lr: 0.000006 loss_cls: 3.5566 (3.8837) grad_norm: 2.4281 (2.4034) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 22:58:40 root] (utils.py 283): INFO Epoch: [21] [1590/2502] eta: 0:11:40 lr: 0.000006 loss_cls: 3.5817 (3.8834) grad_norm: 2.4334 (2.4034) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-10 22:58:47 root] (utils.py 283): INFO Epoch: [21] [1600/2502] eta: 0:11:32 lr: 0.000006 loss_cls: 4.0756 (3.8849) grad_norm: 2.3827 (2.4032) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 22:58:55 root] (utils.py 283): INFO Epoch: [21] [1610/2502] eta: 0:11:24 lr: 0.000006 loss_cls: 4.1835 (3.8839) grad_norm: 2.3336 (2.4030) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:02 root] (utils.py 283): INFO Epoch: [21] [1620/2502] eta: 0:11:17 lr: 0.000006 loss_cls: 3.4475 (3.8818) grad_norm: 2.3324 (2.4025) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:10 root] (utils.py 283): INFO Epoch: [21] [1630/2502] eta: 0:11:09 lr: 0.000006 loss_cls: 3.8647 (3.8822) grad_norm: 2.3341 (2.4022) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:18 root] (utils.py 283): INFO Epoch: [21] [1640/2502] eta: 0:11:01 lr: 0.000006 loss_cls: 3.9747 (3.8822) grad_norm: 2.3152 (2.4018) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:25 root] (utils.py 283): INFO Epoch: [21] [1650/2502] eta: 0:10:54 lr: 0.000006 loss_cls: 4.0828 (3.8839) grad_norm: 2.3419 (2.4021) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:33 root] (utils.py 283): INFO Epoch: [21] [1660/2502] eta: 0:10:46 lr: 0.000006 loss_cls: 4.3260 (3.8849) grad_norm: 2.3867 (2.4020) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:41 root] (utils.py 283): INFO Epoch: [21] [1670/2502] eta: 0:10:38 lr: 0.000006 loss_cls: 4.0010 (3.8839) grad_norm: 2.3948 (2.4020) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 22:59:48 root] (utils.py 283): INFO Epoch: [21] [1680/2502] eta: 0:10:30 lr: 0.000006 loss_cls: 3.9361 (3.8838) grad_norm: 2.3948 (2.4022) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 22:59:56 root] (utils.py 283): INFO Epoch: [21] [1690/2502] eta: 0:10:23 lr: 0.000006 loss_cls: 3.9361 (3.8837) grad_norm: 2.3894 (2.4020) time: 0.7696 data: 0.0003 max mem: 8426 +[2024-12-10 23:00:04 root] (utils.py 283): INFO Epoch: [21] [1700/2502] eta: 0:10:15 lr: 0.000006 loss_cls: 3.9920 (3.8846) grad_norm: 2.3633 (2.4019) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-10 23:00:11 root] (utils.py 283): INFO Epoch: [21] [1710/2502] eta: 0:10:07 lr: 0.000006 loss_cls: 4.0019 (3.8848) grad_norm: 2.3565 (2.4017) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 23:00:19 root] (utils.py 283): INFO Epoch: [21] [1720/2502] eta: 0:10:00 lr: 0.000006 loss_cls: 4.0644 (3.8866) grad_norm: 2.3565 (2.4019) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 23:00:27 root] (utils.py 283): INFO Epoch: [21] [1730/2502] eta: 0:09:52 lr: 0.000006 loss_cls: 4.0644 (3.8866) grad_norm: 2.4199 (2.4020) time: 0.7706 data: 0.0003 max mem: 8426 +[2024-12-10 23:00:34 root] (utils.py 283): INFO Epoch: [21] [1740/2502] eta: 0:09:44 lr: 0.000006 loss_cls: 3.9869 (3.8876) grad_norm: 2.4417 (2.4020) time: 0.7675 data: 0.0003 max mem: 8426 +[2024-12-10 23:00:42 root] (utils.py 283): INFO Epoch: [21] [1750/2502] eta: 0:09:37 lr: 0.000006 loss_cls: 3.9695 (3.8867) grad_norm: 2.4166 (2.4020) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 23:00:50 root] (utils.py 283): INFO Epoch: [21] [1760/2502] eta: 0:09:29 lr: 0.000006 loss_cls: 3.9049 (3.8873) grad_norm: 2.3790 (2.4023) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 23:00:57 root] (utils.py 283): INFO Epoch: [21] [1770/2502] eta: 0:09:21 lr: 0.000006 loss_cls: 4.0263 (3.8879) grad_norm: 2.4023 (2.4024) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:05 root] (utils.py 283): INFO Epoch: [21] [1780/2502] eta: 0:09:14 lr: 0.000006 loss_cls: 4.0181 (3.8881) grad_norm: 2.4014 (2.4022) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:13 root] (utils.py 283): INFO Epoch: [21] [1790/2502] eta: 0:09:06 lr: 0.000006 loss_cls: 3.9688 (3.8881) grad_norm: 2.3885 (2.4023) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:20 root] (utils.py 283): INFO Epoch: [21] [1800/2502] eta: 0:08:58 lr: 0.000006 loss_cls: 3.5487 (3.8876) grad_norm: 2.4054 (2.4026) time: 0.7706 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:28 root] (utils.py 283): INFO Epoch: [21] [1810/2502] eta: 0:08:51 lr: 0.000006 loss_cls: 3.4161 (3.8854) grad_norm: 2.4251 (2.4030) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:36 root] (utils.py 283): INFO Epoch: [21] [1820/2502] eta: 0:08:43 lr: 0.000006 loss_cls: 3.7306 (3.8852) grad_norm: 2.4961 (2.4031) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:43 root] (utils.py 283): INFO Epoch: [21] [1830/2502] eta: 0:08:35 lr: 0.000006 loss_cls: 3.7306 (3.8827) grad_norm: 2.3846 (2.4029) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:51 root] (utils.py 283): INFO Epoch: [21] [1840/2502] eta: 0:08:28 lr: 0.000006 loss_cls: 3.9380 (3.8844) grad_norm: 2.3581 (2.4031) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 23:01:59 root] (utils.py 283): INFO Epoch: [21] [1850/2502] eta: 0:08:20 lr: 0.000006 loss_cls: 4.1803 (3.8860) grad_norm: 2.3559 (2.4029) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 23:02:06 root] (utils.py 283): INFO Epoch: [21] [1860/2502] eta: 0:08:12 lr: 0.000006 loss_cls: 3.7958 (3.8838) grad_norm: 2.3240 (2.4027) time: 0.7600 data: 0.0003 max mem: 8426 +[2024-12-10 23:02:14 root] (utils.py 283): INFO Epoch: [21] [1870/2502] eta: 0:08:04 lr: 0.000006 loss_cls: 3.2457 (3.8818) grad_norm: 2.4084 (2.4030) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 23:02:21 root] (utils.py 283): INFO Epoch: [21] [1880/2502] eta: 0:07:57 lr: 0.000006 loss_cls: 3.6499 (3.8815) grad_norm: 2.4109 (2.4029) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 23:02:29 root] (utils.py 283): INFO Epoch: [21] [1890/2502] eta: 0:07:49 lr: 0.000006 loss_cls: 3.8358 (3.8821) grad_norm: 2.4109 (2.4030) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-10 23:02:37 root] (utils.py 283): INFO Epoch: [21] [1900/2502] eta: 0:07:41 lr: 0.000006 loss_cls: 3.8358 (3.8823) grad_norm: 2.4204 (2.4030) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 23:02:44 root] (utils.py 283): INFO Epoch: [21] [1910/2502] eta: 0:07:34 lr: 0.000006 loss_cls: 3.6006 (3.8806) grad_norm: 2.3950 (2.4029) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 23:02:52 root] (utils.py 283): INFO Epoch: [21] [1920/2502] eta: 0:07:26 lr: 0.000006 loss_cls: 3.6522 (3.8814) grad_norm: 2.3984 (2.4029) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-10 23:03:00 root] (utils.py 283): INFO Epoch: [21] [1930/2502] eta: 0:07:18 lr: 0.000006 loss_cls: 4.1817 (3.8817) grad_norm: 2.4098 (2.4029) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 23:03:07 root] (utils.py 283): INFO Epoch: [21] [1940/2502] eta: 0:07:11 lr: 0.000006 loss_cls: 4.0117 (3.8827) grad_norm: 2.3992 (2.4031) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-10 23:03:15 root] (utils.py 283): INFO Epoch: [21] [1950/2502] eta: 0:07:03 lr: 0.000006 loss_cls: 3.7918 (3.8811) grad_norm: 2.4341 (2.4035) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 23:03:23 root] (utils.py 283): INFO Epoch: [21] [1960/2502] eta: 0:06:55 lr: 0.000006 loss_cls: 3.9029 (3.8823) grad_norm: 2.4420 (2.4036) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 23:03:30 root] (utils.py 283): INFO Epoch: [21] [1970/2502] eta: 0:06:48 lr: 0.000006 loss_cls: 4.1376 (3.8823) grad_norm: 2.4258 (2.4041) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 23:03:38 root] (utils.py 283): INFO Epoch: [21] [1980/2502] eta: 0:06:40 lr: 0.000006 loss_cls: 3.9212 (3.8822) grad_norm: 2.3872 (2.4040) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 23:03:46 root] (utils.py 283): INFO Epoch: [21] [1990/2502] eta: 0:06:32 lr: 0.000006 loss_cls: 3.8953 (3.8813) grad_norm: 2.4134 (2.4043) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 23:03:53 root] (utils.py 283): INFO Epoch: [21] [2000/2502] eta: 0:06:25 lr: 0.000006 loss_cls: 3.7190 (3.8807) grad_norm: 2.4427 (2.4044) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 23:04:01 root] (utils.py 283): INFO Epoch: [21] [2010/2502] eta: 0:06:17 lr: 0.000006 loss_cls: 3.9958 (3.8812) grad_norm: 2.4776 (2.4048) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 23:04:08 root] (utils.py 283): INFO Epoch: [21] [2020/2502] eta: 0:06:09 lr: 0.000006 loss_cls: 4.1111 (3.8823) grad_norm: 2.4776 (2.4047) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 23:04:16 root] (utils.py 283): INFO Epoch: [21] [2030/2502] eta: 0:06:02 lr: 0.000006 loss_cls: 4.1154 (3.8839) grad_norm: 2.3745 (2.4051) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 23:04:24 root] (utils.py 283): INFO Epoch: [21] [2040/2502] eta: 0:05:54 lr: 0.000006 loss_cls: 4.0566 (3.8845) grad_norm: 2.3745 (2.4047) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 23:04:31 root] (utils.py 283): INFO Epoch: [21] [2050/2502] eta: 0:05:46 lr: 0.000006 loss_cls: 4.1010 (3.8852) grad_norm: 2.4112 (2.4050) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-10 23:04:39 root] (utils.py 283): INFO Epoch: [21] [2060/2502] eta: 0:05:39 lr: 0.000006 loss_cls: 4.1010 (3.8868) grad_norm: 2.4508 (2.4052) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 23:04:47 root] (utils.py 283): INFO Epoch: [21] [2070/2502] eta: 0:05:31 lr: 0.000006 loss_cls: 3.9408 (3.8856) grad_norm: 2.4183 (2.4054) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:04:54 root] (utils.py 283): INFO Epoch: [21] [2080/2502] eta: 0:05:23 lr: 0.000006 loss_cls: 3.8702 (3.8855) grad_norm: 2.4105 (2.4055) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 23:05:02 root] (utils.py 283): INFO Epoch: [21] [2090/2502] eta: 0:05:16 lr: 0.000006 loss_cls: 4.0704 (3.8860) grad_norm: 2.4044 (2.4054) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 23:05:10 root] (utils.py 283): INFO Epoch: [21] [2100/2502] eta: 0:05:08 lr: 0.000006 loss_cls: 3.8988 (3.8852) grad_norm: 2.3500 (2.4057) time: 0.7630 data: 0.0003 max mem: 8426 +[2024-12-10 23:05:17 root] (utils.py 283): INFO Epoch: [21] [2110/2502] eta: 0:05:00 lr: 0.000006 loss_cls: 3.6639 (3.8845) grad_norm: 2.4113 (2.4058) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 23:05:25 root] (utils.py 283): INFO Epoch: [21] [2120/2502] eta: 0:04:52 lr: 0.000006 loss_cls: 3.8030 (3.8846) grad_norm: 2.4566 (2.4063) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 23:05:33 root] (utils.py 283): INFO Epoch: [21] [2130/2502] eta: 0:04:45 lr: 0.000006 loss_cls: 3.9452 (3.8843) grad_norm: 2.4532 (2.4062) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 23:05:40 root] (utils.py 283): INFO Epoch: [21] [2140/2502] eta: 0:04:37 lr: 0.000006 loss_cls: 3.9452 (3.8849) grad_norm: 2.3018 (2.4058) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 23:05:48 root] (utils.py 283): INFO Epoch: [21] [2150/2502] eta: 0:04:29 lr: 0.000006 loss_cls: 3.6610 (3.8842) grad_norm: 2.3703 (2.4058) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 23:05:55 root] (utils.py 283): INFO Epoch: [21] [2160/2502] eta: 0:04:22 lr: 0.000006 loss_cls: 3.5780 (3.8831) grad_norm: 2.4149 (2.4061) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-10 23:06:03 root] (utils.py 283): INFO Epoch: [21] [2170/2502] eta: 0:04:14 lr: 0.000006 loss_cls: 3.6201 (3.8815) grad_norm: 2.4583 (2.4063) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 23:06:11 root] (utils.py 283): INFO Epoch: [21] [2180/2502] eta: 0:04:06 lr: 0.000006 loss_cls: 3.9165 (3.8827) grad_norm: 2.4058 (2.4063) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-10 23:06:18 root] (utils.py 283): INFO Epoch: [21] [2190/2502] eta: 0:03:59 lr: 0.000006 loss_cls: 3.9165 (3.8811) grad_norm: 2.4341 (2.4065) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 23:06:26 root] (utils.py 283): INFO Epoch: [21] [2200/2502] eta: 0:03:51 lr: 0.000006 loss_cls: 3.3766 (3.8796) grad_norm: 2.4341 (2.4065) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 23:06:34 root] (utils.py 283): INFO Epoch: [21] [2210/2502] eta: 0:03:43 lr: 0.000006 loss_cls: 3.6361 (3.8796) grad_norm: 2.3587 (2.4061) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 23:06:41 root] (utils.py 283): INFO Epoch: [21] [2220/2502] eta: 0:03:36 lr: 0.000006 loss_cls: 3.8688 (3.8792) grad_norm: 2.3808 (2.4064) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-10 23:06:49 root] (utils.py 283): INFO Epoch: [21] [2230/2502] eta: 0:03:28 lr: 0.000006 loss_cls: 4.1022 (3.8796) grad_norm: 2.4827 (2.4066) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 23:06:56 root] (utils.py 283): INFO Epoch: [21] [2240/2502] eta: 0:03:20 lr: 0.000006 loss_cls: 3.9151 (3.8792) grad_norm: 2.4518 (2.4066) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 23:07:04 root] (utils.py 283): INFO Epoch: [21] [2250/2502] eta: 0:03:13 lr: 0.000006 loss_cls: 4.0612 (3.8792) grad_norm: 2.3602 (2.4066) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 23:07:12 root] (utils.py 283): INFO Epoch: [21] [2260/2502] eta: 0:03:05 lr: 0.000006 loss_cls: 4.0612 (3.8790) grad_norm: 2.3748 (2.4067) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 23:07:19 root] (utils.py 283): INFO Epoch: [21] [2270/2502] eta: 0:02:57 lr: 0.000006 loss_cls: 3.8246 (3.8792) grad_norm: 2.3955 (2.4066) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 23:07:27 root] (utils.py 283): INFO Epoch: [21] [2280/2502] eta: 0:02:50 lr: 0.000006 loss_cls: 3.8246 (3.8794) grad_norm: 2.3924 (2.4066) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 23:07:35 root] (utils.py 283): INFO Epoch: [21] [2290/2502] eta: 0:02:42 lr: 0.000006 loss_cls: 3.9774 (3.8796) grad_norm: 2.4254 (2.4068) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-10 23:07:42 root] (utils.py 283): INFO Epoch: [21] [2300/2502] eta: 0:02:34 lr: 0.000006 loss_cls: 3.9293 (3.8789) grad_norm: 2.3877 (2.4067) time: 0.7607 data: 0.0003 max mem: 8426 +[2024-12-10 23:07:50 root] (utils.py 283): INFO Epoch: [21] [2310/2502] eta: 0:02:27 lr: 0.000006 loss_cls: 4.1894 (3.8795) grad_norm: 2.4104 (2.4069) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-10 23:07:57 root] (utils.py 283): INFO Epoch: [21] [2320/2502] eta: 0:02:19 lr: 0.000006 loss_cls: 4.1937 (3.8799) grad_norm: 2.4145 (2.4069) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 23:08:05 root] (utils.py 283): INFO Epoch: [21] [2330/2502] eta: 0:02:11 lr: 0.000006 loss_cls: 4.1700 (3.8808) grad_norm: 2.3811 (2.4069) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 23:08:13 root] (utils.py 283): INFO Epoch: [21] [2340/2502] eta: 0:02:04 lr: 0.000006 loss_cls: 4.1784 (3.8817) grad_norm: 2.4208 (2.4071) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-10 23:08:20 root] (utils.py 283): INFO Epoch: [21] [2350/2502] eta: 0:01:56 lr: 0.000006 loss_cls: 4.0058 (3.8813) grad_norm: 2.4206 (2.4071) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 23:08:28 root] (utils.py 283): INFO Epoch: [21] [2360/2502] eta: 0:01:48 lr: 0.000006 loss_cls: 4.1380 (3.8826) grad_norm: 2.3401 (2.4068) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-10 23:08:36 root] (utils.py 283): INFO Epoch: [21] [2370/2502] eta: 0:01:41 lr: 0.000006 loss_cls: 4.2167 (3.8829) grad_norm: 2.2935 (2.4066) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-10 23:08:43 root] (utils.py 283): INFO Epoch: [21] [2380/2502] eta: 0:01:33 lr: 0.000006 loss_cls: 3.8752 (3.8825) grad_norm: 2.3734 (2.4065) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-10 23:08:51 root] (utils.py 283): INFO Epoch: [21] [2390/2502] eta: 0:01:25 lr: 0.000006 loss_cls: 3.8084 (3.8822) grad_norm: 2.3792 (2.4064) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 23:08:59 root] (utils.py 283): INFO Epoch: [21] [2400/2502] eta: 0:01:18 lr: 0.000006 loss_cls: 4.0050 (3.8830) grad_norm: 2.3444 (2.4064) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 23:09:06 root] (utils.py 283): INFO Epoch: [21] [2410/2502] eta: 0:01:10 lr: 0.000006 loss_cls: 3.9062 (3.8819) grad_norm: 2.4029 (2.4067) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 23:09:14 root] (utils.py 283): INFO Epoch: [21] [2420/2502] eta: 0:01:02 lr: 0.000006 loss_cls: 3.9245 (3.8820) grad_norm: 2.4029 (2.4066) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-10 23:09:21 root] (utils.py 283): INFO Epoch: [21] [2430/2502] eta: 0:00:55 lr: 0.000006 loss_cls: 4.0710 (3.8809) grad_norm: 2.3802 (2.4068) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 23:09:29 root] (utils.py 283): INFO Epoch: [21] [2440/2502] eta: 0:00:47 lr: 0.000006 loss_cls: 4.0710 (3.8812) grad_norm: 2.3959 (2.4068) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 23:09:37 root] (utils.py 283): INFO Epoch: [21] [2450/2502] eta: 0:00:39 lr: 0.000006 loss_cls: 4.0620 (3.8806) grad_norm: 2.4010 (2.4070) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 23:09:44 root] (utils.py 283): INFO Epoch: [21] [2460/2502] eta: 0:00:32 lr: 0.000006 loss_cls: 3.7586 (3.8799) grad_norm: 2.4550 (2.4071) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 23:09:52 root] (utils.py 283): INFO Epoch: [21] [2470/2502] eta: 0:00:24 lr: 0.000006 loss_cls: 3.7134 (3.8796) grad_norm: 2.4345 (2.4072) time: 0.7647 data: 0.0002 max mem: 8426 +[2024-12-10 23:10:00 root] (utils.py 283): INFO Epoch: [21] [2480/2502] eta: 0:00:16 lr: 0.000006 loss_cls: 3.7572 (3.8805) grad_norm: 2.4494 (2.4073) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:08 root] (utils.py 283): INFO Epoch: [21] [2490/2502] eta: 0:00:09 lr: 0.000006 loss_cls: 4.3024 (3.8808) grad_norm: 2.3892 (2.4072) time: 0.7886 data: 0.0234 max mem: 8426 +[2024-12-10 23:10:16 root] (utils.py 283): INFO Epoch: [21] [2500/2502] eta: 0:00:01 lr: 0.000006 loss_cls: 3.7250 (3.8798) grad_norm: 2.3760 (2.4071) time: 0.7981 data: 0.0234 max mem: 8426 +[2024-12-10 23:10:16 root] (utils.py 283): INFO Epoch: [21] [2501/2502] eta: 0:00:00 lr: 0.000006 loss_cls: 4.0672 (3.8800) grad_norm: 2.3760 (2.4070) time: 0.7994 data: 0.0234 max mem: 8426 +[2024-12-10 23:10:16 root] (utils.py 297): INFO Epoch: [21] Total time: 0:31:58 (0.7668 s / it) +[2024-12-10 23:10:16 root] (engine.py 179): INFO Averaged stats:lr: 0.000006 loss_cls: 4.0672 (3.9002) grad_norm: 2.3760 (2.4070) +[2024-12-10 23:10:17 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6298 (0.6298) acc1: 85.1562 (85.1562) acc3: 96.8750 (96.8750) acc5: 100.0000 (100.0000) time: 0.1276 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:18 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7364 (0.8081) acc1: 85.1562 (82.6705) acc3: 95.3125 (93.6790) acc5: 96.8750 (96.4489) time: 0.1278 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:20 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8392 (0.8582) acc1: 80.4688 (81.4360) acc3: 92.1875 (92.9688) acc5: 94.5312 (95.5357) time: 0.1291 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:21 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9365 (0.8749) acc1: 78.9062 (80.4435) acc3: 92.1875 (93.0948) acc5: 95.3125 (95.6653) time: 0.1320 data: 0.0004 max mem: 8426 +[2024-12-10 23:10:22 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8131 (0.8628) acc1: 80.4688 (80.9451) acc3: 94.5312 (93.1784) acc5: 96.0938 (95.7698) time: 0.1332 data: 0.0004 max mem: 8426 +[2024-12-10 23:10:24 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0216 (0.9483) acc1: 75.0000 (78.8603) acc3: 87.5000 (91.8199) acc5: 93.7500 (94.7763) time: 0.1310 data: 0.0004 max mem: 8426 +[2024-12-10 23:10:25 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2103 (0.9938) acc1: 71.8750 (78.0225) acc3: 85.1562 (90.9068) acc5: 89.8438 (93.9805) time: 0.1325 data: 0.0038 max mem: 8426 +[2024-12-10 23:10:26 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1981 (1.0339) acc1: 74.2188 (77.1127) acc3: 87.5000 (90.3719) acc5: 89.8438 (93.5079) time: 0.1394 data: 0.0097 max mem: 8426 +[2024-12-10 23:10:28 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2078 (1.0673) acc1: 72.6562 (76.2731) acc3: 87.5000 (89.8341) acc5: 90.6250 (93.0170) time: 0.1371 data: 0.0078 max mem: 8426 +[2024-12-10 23:10:29 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2387 (1.0952) acc1: 71.0938 (75.5752) acc3: 86.7188 (89.5089) acc5: 89.8438 (92.7541) time: 0.1296 data: 0.0019 max mem: 8426 +[2024-12-10 23:10:30 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1158 (1.0829) acc1: 74.2188 (75.8560) acc3: 88.2812 (89.6800) acc5: 90.6250 (92.9200) time: 0.1278 data: 0.0018 max mem: 8426 +[2024-12-10 23:10:30 root] (utils.py 297): INFO Test: Total time: 0:00:12 (0.1319 s / it) +[2024-12-10 23:10:32 root] (engine.py 264): INFO * Acc@1 75.752 Acc@3 89.638 Acc@5 92.900 loss 1.084 flops 1.285 layer_flops 1.251 +[2024-12-10 23:10:32 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.8% +[2024-12-10 23:10:32 root] (main.py 576): INFO Max accuracy: 75.78% +[2024-12-10 23:10:32 root] (utils.py 283): INFO Epoch: [22] [ 0/2502] eta: 0:32:55 lr: 0.000005 loss_cls: 3.8837 (3.8837) grad_norm: 2.1786 (2.1786) time: 0.7895 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:40 root] (utils.py 283): INFO Epoch: [22] [ 10/2502] eta: 0:32:16 lr: 0.000005 loss_cls: 3.8837 (3.8987) grad_norm: 2.3240 (2.3249) time: 0.7772 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:48 root] (utils.py 283): INFO Epoch: [22] [ 20/2502] eta: 0:32:18 lr: 0.000005 loss_cls: 3.8555 (3.8954) grad_norm: 2.3632 (2.3782) time: 0.7805 data: 0.0003 max mem: 8426 +[2024-12-10 23:10:56 root] (utils.py 283): INFO Epoch: [22] [ 30/2502] eta: 0:31:49 lr: 0.000005 loss_cls: 4.0492 (4.0072) grad_norm: 2.4319 (2.3899) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 23:11:03 root] (utils.py 283): INFO Epoch: [22] [ 40/2502] eta: 0:31:34 lr: 0.000005 loss_cls: 3.9672 (3.9069) grad_norm: 2.3953 (2.3883) time: 0.7576 data: 0.0002 max mem: 8426 +[2024-12-10 23:11:11 root] (utils.py 283): INFO Epoch: [22] [ 50/2502] eta: 0:31:25 lr: 0.000005 loss_cls: 3.7553 (3.9205) grad_norm: 2.4304 (2.4061) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 23:11:18 root] (utils.py 283): INFO Epoch: [22] [ 60/2502] eta: 0:31:16 lr: 0.000005 loss_cls: 3.9918 (3.9313) grad_norm: 2.4481 (2.4183) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 23:11:26 root] (utils.py 283): INFO Epoch: [22] [ 70/2502] eta: 0:31:08 lr: 0.000005 loss_cls: 4.0372 (3.9170) grad_norm: 2.4401 (2.4187) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 23:11:34 root] (utils.py 283): INFO Epoch: [22] [ 80/2502] eta: 0:30:59 lr: 0.000005 loss_cls: 4.0942 (3.9346) grad_norm: 2.3935 (2.4148) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 23:11:42 root] (utils.py 283): INFO Epoch: [22] [ 90/2502] eta: 0:30:52 lr: 0.000005 loss_cls: 4.0519 (3.9055) grad_norm: 2.4059 (2.4262) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 23:11:49 root] (utils.py 283): INFO Epoch: [22] [ 100/2502] eta: 0:30:44 lr: 0.000005 loss_cls: 3.9561 (3.9235) grad_norm: 2.4424 (2.4219) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 23:11:57 root] (utils.py 283): INFO Epoch: [22] [ 110/2502] eta: 0:30:36 lr: 0.000005 loss_cls: 4.0433 (3.9110) grad_norm: 2.3347 (2.4169) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:04 root] (utils.py 283): INFO Epoch: [22] [ 120/2502] eta: 0:30:27 lr: 0.000005 loss_cls: 3.9495 (3.9101) grad_norm: 2.4009 (2.4160) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:12 root] (utils.py 283): INFO Epoch: [22] [ 130/2502] eta: 0:30:21 lr: 0.000005 loss_cls: 4.1596 (3.9247) grad_norm: 2.4009 (2.4171) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:20 root] (utils.py 283): INFO Epoch: [22] [ 140/2502] eta: 0:30:16 lr: 0.000005 loss_cls: 3.6879 (3.9051) grad_norm: 2.4589 (2.4234) time: 0.7784 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:28 root] (utils.py 283): INFO Epoch: [22] [ 150/2502] eta: 0:30:10 lr: 0.000005 loss_cls: 4.1715 (3.9161) grad_norm: 2.4421 (2.4199) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:36 root] (utils.py 283): INFO Epoch: [22] [ 160/2502] eta: 0:30:04 lr: 0.000005 loss_cls: 4.2784 (3.9338) grad_norm: 2.3208 (2.4182) time: 0.7822 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:44 root] (utils.py 283): INFO Epoch: [22] [ 170/2502] eta: 0:29:58 lr: 0.000005 loss_cls: 4.3339 (3.9272) grad_norm: 2.4237 (2.4189) time: 0.7843 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:51 root] (utils.py 283): INFO Epoch: [22] [ 180/2502] eta: 0:29:49 lr: 0.000005 loss_cls: 3.7201 (3.9229) grad_norm: 2.4371 (2.4172) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 23:12:59 root] (utils.py 283): INFO Epoch: [22] [ 190/2502] eta: 0:29:40 lr: 0.000005 loss_cls: 3.9236 (3.9280) grad_norm: 2.4395 (2.4206) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 23:13:06 root] (utils.py 283): INFO Epoch: [22] [ 200/2502] eta: 0:29:32 lr: 0.000005 loss_cls: 3.7668 (3.9115) grad_norm: 2.4959 (2.4241) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-10 23:13:14 root] (utils.py 283): INFO Epoch: [22] [ 210/2502] eta: 0:29:25 lr: 0.000005 loss_cls: 3.7668 (3.9170) grad_norm: 2.4043 (2.4196) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-10 23:13:22 root] (utils.py 283): INFO Epoch: [22] [ 220/2502] eta: 0:29:16 lr: 0.000005 loss_cls: 3.6902 (3.8999) grad_norm: 2.3068 (2.4179) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 23:13:29 root] (utils.py 283): INFO Epoch: [22] [ 230/2502] eta: 0:29:07 lr: 0.000005 loss_cls: 3.6902 (3.9019) grad_norm: 2.3524 (2.4169) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 23:13:37 root] (utils.py 283): INFO Epoch: [22] [ 240/2502] eta: 0:28:59 lr: 0.000005 loss_cls: 3.9828 (3.9017) grad_norm: 2.3835 (2.4178) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 23:13:45 root] (utils.py 283): INFO Epoch: [22] [ 250/2502] eta: 0:28:51 lr: 0.000005 loss_cls: 3.6816 (3.8971) grad_norm: 2.4238 (2.4189) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 23:13:52 root] (utils.py 283): INFO Epoch: [22] [ 260/2502] eta: 0:28:43 lr: 0.000005 loss_cls: 3.9101 (3.8999) grad_norm: 2.4238 (2.4190) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 23:14:00 root] (utils.py 283): INFO Epoch: [22] [ 270/2502] eta: 0:28:35 lr: 0.000005 loss_cls: 4.0816 (3.8996) grad_norm: 2.3774 (2.4152) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-10 23:14:08 root] (utils.py 283): INFO Epoch: [22] [ 280/2502] eta: 0:28:27 lr: 0.000005 loss_cls: 3.9214 (3.8992) grad_norm: 2.3585 (2.4151) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 23:14:15 root] (utils.py 283): INFO Epoch: [22] [ 290/2502] eta: 0:28:20 lr: 0.000005 loss_cls: 3.9214 (3.9035) grad_norm: 2.4245 (2.4152) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 23:14:23 root] (utils.py 283): INFO Epoch: [22] [ 300/2502] eta: 0:28:13 lr: 0.000005 loss_cls: 4.0634 (3.9065) grad_norm: 2.4109 (2.4135) time: 0.7749 data: 0.0003 max mem: 8426 +[2024-12-10 23:14:31 root] (utils.py 283): INFO Epoch: [22] [ 310/2502] eta: 0:28:05 lr: 0.000005 loss_cls: 4.0609 (3.9052) grad_norm: 2.3927 (2.4129) time: 0.7774 data: 0.0002 max mem: 8426 +[2024-12-10 23:14:38 root] (utils.py 283): INFO Epoch: [22] [ 320/2502] eta: 0:27:57 lr: 0.000005 loss_cls: 4.0880 (3.9104) grad_norm: 2.3982 (2.4117) time: 0.7693 data: 0.0002 max mem: 8426 +[2024-12-10 23:14:46 root] (utils.py 283): INFO Epoch: [22] [ 330/2502] eta: 0:27:49 lr: 0.000005 loss_cls: 4.0880 (3.9180) grad_norm: 2.4124 (2.4126) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 23:14:54 root] (utils.py 283): INFO Epoch: [22] [ 340/2502] eta: 0:27:41 lr: 0.000005 loss_cls: 3.9502 (3.9131) grad_norm: 2.4258 (2.4140) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:01 root] (utils.py 283): INFO Epoch: [22] [ 350/2502] eta: 0:27:33 lr: 0.000005 loss_cls: 3.8032 (3.9152) grad_norm: 2.4258 (2.4151) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:09 root] (utils.py 283): INFO Epoch: [22] [ 360/2502] eta: 0:27:25 lr: 0.000005 loss_cls: 3.8537 (3.9085) grad_norm: 2.3672 (2.4151) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:17 root] (utils.py 283): INFO Epoch: [22] [ 370/2502] eta: 0:27:17 lr: 0.000005 loss_cls: 3.9393 (3.9154) grad_norm: 2.3158 (2.4145) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:24 root] (utils.py 283): INFO Epoch: [22] [ 380/2502] eta: 0:27:10 lr: 0.000005 loss_cls: 4.1944 (3.9192) grad_norm: 2.3288 (2.4133) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:32 root] (utils.py 283): INFO Epoch: [22] [ 390/2502] eta: 0:27:02 lr: 0.000005 loss_cls: 4.1944 (3.9208) grad_norm: 2.3810 (2.4131) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:40 root] (utils.py 283): INFO Epoch: [22] [ 400/2502] eta: 0:26:54 lr: 0.000005 loss_cls: 3.9928 (3.9205) grad_norm: 2.3990 (2.4120) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:47 root] (utils.py 283): INFO Epoch: [22] [ 410/2502] eta: 0:26:47 lr: 0.000005 loss_cls: 3.9588 (3.9184) grad_norm: 2.3178 (2.4097) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 23:15:55 root] (utils.py 283): INFO Epoch: [22] [ 420/2502] eta: 0:26:39 lr: 0.000005 loss_cls: 4.0398 (3.9163) grad_norm: 2.3025 (2.4087) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 23:16:03 root] (utils.py 283): INFO Epoch: [22] [ 430/2502] eta: 0:26:31 lr: 0.000005 loss_cls: 4.2627 (3.9186) grad_norm: 2.3563 (2.4083) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 23:16:10 root] (utils.py 283): INFO Epoch: [22] [ 440/2502] eta: 0:26:23 lr: 0.000005 loss_cls: 4.2004 (3.9263) grad_norm: 2.3805 (2.4097) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 23:16:18 root] (utils.py 283): INFO Epoch: [22] [ 450/2502] eta: 0:26:15 lr: 0.000005 loss_cls: 4.1831 (3.9275) grad_norm: 2.4643 (2.4103) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:16:26 root] (utils.py 283): INFO Epoch: [22] [ 460/2502] eta: 0:26:07 lr: 0.000005 loss_cls: 3.8426 (3.9176) grad_norm: 2.4066 (2.4105) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 23:16:33 root] (utils.py 283): INFO Epoch: [22] [ 470/2502] eta: 0:25:59 lr: 0.000005 loss_cls: 4.0066 (3.9222) grad_norm: 2.3918 (2.4105) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-10 23:16:41 root] (utils.py 283): INFO Epoch: [22] [ 480/2502] eta: 0:25:52 lr: 0.000005 loss_cls: 4.3068 (3.9243) grad_norm: 2.3693 (2.4102) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 23:16:49 root] (utils.py 283): INFO Epoch: [22] [ 490/2502] eta: 0:25:44 lr: 0.000005 loss_cls: 4.1994 (3.9251) grad_norm: 2.4006 (2.4109) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 23:16:56 root] (utils.py 283): INFO Epoch: [22] [ 500/2502] eta: 0:25:36 lr: 0.000005 loss_cls: 4.0722 (3.9211) grad_norm: 2.4027 (2.4122) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 23:17:04 root] (utils.py 283): INFO Epoch: [22] [ 510/2502] eta: 0:25:29 lr: 0.000005 loss_cls: 3.9990 (3.9213) grad_norm: 2.4070 (2.4113) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-10 23:17:12 root] (utils.py 283): INFO Epoch: [22] [ 520/2502] eta: 0:25:21 lr: 0.000005 loss_cls: 4.1234 (3.9278) grad_norm: 2.3937 (2.4113) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 23:17:19 root] (utils.py 283): INFO Epoch: [22] [ 530/2502] eta: 0:25:13 lr: 0.000005 loss_cls: 4.1234 (3.9230) grad_norm: 2.3865 (2.4116) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 23:17:27 root] (utils.py 283): INFO Epoch: [22] [ 540/2502] eta: 0:25:05 lr: 0.000005 loss_cls: 3.8438 (3.9237) grad_norm: 2.3865 (2.4110) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 23:17:35 root] (utils.py 283): INFO Epoch: [22] [ 550/2502] eta: 0:24:58 lr: 0.000005 loss_cls: 4.0848 (3.9241) grad_norm: 2.3999 (2.4118) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 23:17:42 root] (utils.py 283): INFO Epoch: [22] [ 560/2502] eta: 0:24:50 lr: 0.000005 loss_cls: 4.1959 (3.9257) grad_norm: 2.4201 (2.4130) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 23:17:50 root] (utils.py 283): INFO Epoch: [22] [ 570/2502] eta: 0:24:42 lr: 0.000005 loss_cls: 4.0446 (3.9278) grad_norm: 2.4245 (2.4129) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 23:17:57 root] (utils.py 283): INFO Epoch: [22] [ 580/2502] eta: 0:24:34 lr: 0.000005 loss_cls: 4.0446 (3.9289) grad_norm: 2.4472 (2.4135) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 23:18:05 root] (utils.py 283): INFO Epoch: [22] [ 590/2502] eta: 0:24:27 lr: 0.000005 loss_cls: 4.0512 (3.9318) grad_norm: 2.4363 (2.4141) time: 0.7710 data: 0.0002 max mem: 8426 +[2024-12-10 23:18:13 root] (utils.py 283): INFO Epoch: [22] [ 600/2502] eta: 0:24:19 lr: 0.000005 loss_cls: 4.0207 (3.9283) grad_norm: 2.3975 (2.4141) time: 0.7709 data: 0.0002 max mem: 8426 +[2024-12-10 23:18:20 root] (utils.py 283): INFO Epoch: [22] [ 610/2502] eta: 0:24:11 lr: 0.000005 loss_cls: 3.5505 (3.9222) grad_norm: 2.4027 (2.4153) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 23:18:28 root] (utils.py 283): INFO Epoch: [22] [ 620/2502] eta: 0:24:04 lr: 0.000005 loss_cls: 3.6257 (3.9196) grad_norm: 2.3857 (2.4144) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 23:18:36 root] (utils.py 283): INFO Epoch: [22] [ 630/2502] eta: 0:23:56 lr: 0.000005 loss_cls: 3.9734 (3.9168) grad_norm: 2.3588 (2.4150) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-10 23:18:44 root] (utils.py 283): INFO Epoch: [22] [ 640/2502] eta: 0:23:48 lr: 0.000005 loss_cls: 3.9734 (3.9162) grad_norm: 2.3966 (2.4147) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-10 23:18:51 root] (utils.py 283): INFO Epoch: [22] [ 650/2502] eta: 0:23:40 lr: 0.000005 loss_cls: 4.0200 (3.9185) grad_norm: 2.4712 (2.4156) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 23:18:59 root] (utils.py 283): INFO Epoch: [22] [ 660/2502] eta: 0:23:33 lr: 0.000005 loss_cls: 4.1836 (3.9225) grad_norm: 2.4837 (2.4171) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-10 23:19:06 root] (utils.py 283): INFO Epoch: [22] [ 670/2502] eta: 0:23:25 lr: 0.000005 loss_cls: 3.9719 (3.9190) grad_norm: 2.4018 (2.4170) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 23:19:14 root] (utils.py 283): INFO Epoch: [22] [ 680/2502] eta: 0:23:17 lr: 0.000005 loss_cls: 3.6742 (3.9162) grad_norm: 2.4146 (2.4183) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 23:19:22 root] (utils.py 283): INFO Epoch: [22] [ 690/2502] eta: 0:23:09 lr: 0.000005 loss_cls: 3.8085 (3.9146) grad_norm: 2.3923 (2.4173) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 23:19:29 root] (utils.py 283): INFO Epoch: [22] [ 700/2502] eta: 0:23:02 lr: 0.000005 loss_cls: 3.9219 (3.9119) grad_norm: 2.3418 (2.4168) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 23:19:37 root] (utils.py 283): INFO Epoch: [22] [ 710/2502] eta: 0:22:54 lr: 0.000005 loss_cls: 4.0556 (3.9156) grad_norm: 2.4101 (2.4185) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-10 23:19:45 root] (utils.py 283): INFO Epoch: [22] [ 720/2502] eta: 0:22:47 lr: 0.000005 loss_cls: 4.1427 (3.9110) grad_norm: 2.5109 (2.4192) time: 0.7826 data: 0.0002 max mem: 8426 +[2024-12-10 23:19:53 root] (utils.py 283): INFO Epoch: [22] [ 730/2502] eta: 0:22:40 lr: 0.000005 loss_cls: 3.5571 (3.9097) grad_norm: 2.4967 (2.4204) time: 0.7784 data: 0.0002 max mem: 8426 +[2024-12-10 23:20:01 root] (utils.py 283): INFO Epoch: [22] [ 740/2502] eta: 0:22:32 lr: 0.000005 loss_cls: 4.0284 (3.9096) grad_norm: 2.3955 (2.4200) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-10 23:20:08 root] (utils.py 283): INFO Epoch: [22] [ 750/2502] eta: 0:22:25 lr: 0.000005 loss_cls: 3.9127 (3.9108) grad_norm: 2.3894 (2.4207) time: 0.7750 data: 0.0002 max mem: 8426 +[2024-12-10 23:20:16 root] (utils.py 283): INFO Epoch: [22] [ 760/2502] eta: 0:22:17 lr: 0.000005 loss_cls: 3.8152 (3.9080) grad_norm: 2.3929 (2.4214) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 23:20:23 root] (utils.py 283): INFO Epoch: [22] [ 770/2502] eta: 0:22:09 lr: 0.000005 loss_cls: 3.8736 (3.9086) grad_norm: 2.3929 (2.4215) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-10 23:20:31 root] (utils.py 283): INFO Epoch: [22] [ 780/2502] eta: 0:22:01 lr: 0.000005 loss_cls: 3.8508 (3.9050) grad_norm: 2.3717 (2.4209) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 23:20:39 root] (utils.py 283): INFO Epoch: [22] [ 790/2502] eta: 0:21:53 lr: 0.000005 loss_cls: 3.7022 (3.9041) grad_norm: 2.4271 (2.4215) time: 0.7619 data: 0.0003 max mem: 8426 +[2024-12-10 23:20:46 root] (utils.py 283): INFO Epoch: [22] [ 800/2502] eta: 0:21:46 lr: 0.000005 loss_cls: 3.7744 (3.9042) grad_norm: 2.4745 (2.4219) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 23:20:54 root] (utils.py 283): INFO Epoch: [22] [ 810/2502] eta: 0:21:38 lr: 0.000005 loss_cls: 3.9149 (3.9008) grad_norm: 2.4355 (2.4216) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 23:21:02 root] (utils.py 283): INFO Epoch: [22] [ 820/2502] eta: 0:21:30 lr: 0.000005 loss_cls: 4.1384 (3.9062) grad_norm: 2.3972 (2.4213) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-10 23:21:09 root] (utils.py 283): INFO Epoch: [22] [ 830/2502] eta: 0:21:22 lr: 0.000005 loss_cls: 4.1717 (3.9072) grad_norm: 2.4092 (2.4216) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 23:21:17 root] (utils.py 283): INFO Epoch: [22] [ 840/2502] eta: 0:21:15 lr: 0.000005 loss_cls: 3.8979 (3.9048) grad_norm: 2.4092 (2.4217) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 23:21:25 root] (utils.py 283): INFO Epoch: [22] [ 850/2502] eta: 0:21:07 lr: 0.000005 loss_cls: 3.8137 (3.9042) grad_norm: 2.3557 (2.4209) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-10 23:21:32 root] (utils.py 283): INFO Epoch: [22] [ 860/2502] eta: 0:20:59 lr: 0.000005 loss_cls: 3.7399 (3.8993) grad_norm: 2.3689 (2.4215) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 23:21:40 root] (utils.py 283): INFO Epoch: [22] [ 870/2502] eta: 0:20:52 lr: 0.000005 loss_cls: 4.0419 (3.9021) grad_norm: 2.4858 (2.4222) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 23:21:48 root] (utils.py 283): INFO Epoch: [22] [ 880/2502] eta: 0:20:44 lr: 0.000005 loss_cls: 4.0419 (3.9020) grad_norm: 2.3897 (2.4215) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-10 23:21:55 root] (utils.py 283): INFO Epoch: [22] [ 890/2502] eta: 0:20:36 lr: 0.000005 loss_cls: 4.1042 (3.9056) grad_norm: 2.3692 (2.4210) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:03 root] (utils.py 283): INFO Epoch: [22] [ 900/2502] eta: 0:20:28 lr: 0.000005 loss_cls: 4.1042 (3.9034) grad_norm: 2.4142 (2.4217) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:11 root] (utils.py 283): INFO Epoch: [22] [ 910/2502] eta: 0:20:21 lr: 0.000005 loss_cls: 3.8294 (3.9060) grad_norm: 2.4142 (2.4212) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:18 root] (utils.py 283): INFO Epoch: [22] [ 920/2502] eta: 0:20:13 lr: 0.000005 loss_cls: 4.1225 (3.9069) grad_norm: 2.3451 (2.4209) time: 0.7668 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:26 root] (utils.py 283): INFO Epoch: [22] [ 930/2502] eta: 0:20:05 lr: 0.000005 loss_cls: 4.1225 (3.9070) grad_norm: 2.3693 (2.4206) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 23:22:34 root] (utils.py 283): INFO Epoch: [22] [ 940/2502] eta: 0:19:58 lr: 0.000005 loss_cls: 4.1842 (3.9071) grad_norm: 2.3804 (2.4200) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:41 root] (utils.py 283): INFO Epoch: [22] [ 950/2502] eta: 0:19:50 lr: 0.000005 loss_cls: 4.2151 (3.9086) grad_norm: 2.3804 (2.4203) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:49 root] (utils.py 283): INFO Epoch: [22] [ 960/2502] eta: 0:19:42 lr: 0.000005 loss_cls: 3.8439 (3.9067) grad_norm: 2.4308 (2.4201) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-10 23:22:56 root] (utils.py 283): INFO Epoch: [22] [ 970/2502] eta: 0:19:35 lr: 0.000005 loss_cls: 3.8890 (3.9083) grad_norm: 2.3713 (2.4196) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-10 23:23:04 root] (utils.py 283): INFO Epoch: [22] [ 980/2502] eta: 0:19:27 lr: 0.000005 loss_cls: 3.9308 (3.9077) grad_norm: 2.3455 (2.4195) time: 0.7699 data: 0.0003 max mem: 8426 +[2024-12-10 23:23:12 root] (utils.py 283): INFO Epoch: [22] [ 990/2502] eta: 0:19:19 lr: 0.000005 loss_cls: 3.8784 (3.9066) grad_norm: 2.3695 (2.4195) time: 0.7697 data: 0.0003 max mem: 8426 +[2024-12-10 23:23:20 root] (utils.py 283): INFO Epoch: [22] [1000/2502] eta: 0:19:12 lr: 0.000005 loss_cls: 3.9858 (3.9058) grad_norm: 2.3817 (2.4191) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 23:23:27 root] (utils.py 283): INFO Epoch: [22] [1010/2502] eta: 0:19:04 lr: 0.000005 loss_cls: 3.9858 (3.9058) grad_norm: 2.3774 (2.4187) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 23:23:35 root] (utils.py 283): INFO Epoch: [22] [1020/2502] eta: 0:18:56 lr: 0.000005 loss_cls: 4.0649 (3.9068) grad_norm: 2.3644 (2.4183) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-10 23:23:43 root] (utils.py 283): INFO Epoch: [22] [1030/2502] eta: 0:18:49 lr: 0.000005 loss_cls: 3.9359 (3.9054) grad_norm: 2.4143 (2.4184) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 23:23:50 root] (utils.py 283): INFO Epoch: [22] [1040/2502] eta: 0:18:41 lr: 0.000005 loss_cls: 3.5886 (3.9024) grad_norm: 2.3944 (2.4178) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 23:23:58 root] (utils.py 283): INFO Epoch: [22] [1050/2502] eta: 0:18:33 lr: 0.000005 loss_cls: 3.5476 (3.9000) grad_norm: 2.3701 (2.4178) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:05 root] (utils.py 283): INFO Epoch: [22] [1060/2502] eta: 0:18:25 lr: 0.000005 loss_cls: 3.9130 (3.9014) grad_norm: 2.4205 (2.4177) time: 0.7596 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:13 root] (utils.py 283): INFO Epoch: [22] [1070/2502] eta: 0:18:18 lr: 0.000005 loss_cls: 3.8861 (3.9013) grad_norm: 2.3630 (2.4171) time: 0.7591 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:21 root] (utils.py 283): INFO Epoch: [22] [1080/2502] eta: 0:18:10 lr: 0.000005 loss_cls: 3.7594 (3.8979) grad_norm: 2.3949 (2.4170) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:28 root] (utils.py 283): INFO Epoch: [22] [1090/2502] eta: 0:18:02 lr: 0.000005 loss_cls: 3.3865 (3.8971) grad_norm: 2.4005 (2.4172) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:36 root] (utils.py 283): INFO Epoch: [22] [1100/2502] eta: 0:17:54 lr: 0.000005 loss_cls: 3.8596 (3.8974) grad_norm: 2.4205 (2.4175) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:44 root] (utils.py 283): INFO Epoch: [22] [1110/2502] eta: 0:17:47 lr: 0.000005 loss_cls: 3.6334 (3.8957) grad_norm: 2.3943 (2.4168) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:51 root] (utils.py 283): INFO Epoch: [22] [1120/2502] eta: 0:17:39 lr: 0.000005 loss_cls: 3.6334 (3.8950) grad_norm: 2.3943 (2.4170) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-10 23:24:59 root] (utils.py 283): INFO Epoch: [22] [1130/2502] eta: 0:17:31 lr: 0.000005 loss_cls: 3.8501 (3.8948) grad_norm: 2.3782 (2.4164) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-10 23:25:06 root] (utils.py 283): INFO Epoch: [22] [1140/2502] eta: 0:17:24 lr: 0.000005 loss_cls: 3.7569 (3.8941) grad_norm: 2.3704 (2.4167) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-10 23:25:14 root] (utils.py 283): INFO Epoch: [22] [1150/2502] eta: 0:17:16 lr: 0.000005 loss_cls: 4.0782 (3.8963) grad_norm: 2.3718 (2.4162) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-10 23:25:22 root] (utils.py 283): INFO Epoch: [22] [1160/2502] eta: 0:17:08 lr: 0.000005 loss_cls: 4.0700 (3.8949) grad_norm: 2.3973 (2.4169) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 23:25:29 root] (utils.py 283): INFO Epoch: [22] [1170/2502] eta: 0:17:01 lr: 0.000005 loss_cls: 3.9870 (3.8960) grad_norm: 2.4140 (2.4166) time: 0.7637 data: 0.0003 max mem: 8426 +[2024-12-10 23:25:37 root] (utils.py 283): INFO Epoch: [22] [1180/2502] eta: 0:16:53 lr: 0.000005 loss_cls: 4.1433 (3.8951) grad_norm: 2.3839 (2.4171) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-10 23:25:45 root] (utils.py 283): INFO Epoch: [22] [1190/2502] eta: 0:16:45 lr: 0.000005 loss_cls: 3.8792 (3.8939) grad_norm: 2.4190 (2.4169) time: 0.7690 data: 0.0003 max mem: 8426 +[2024-12-10 23:25:52 root] (utils.py 283): INFO Epoch: [22] [1200/2502] eta: 0:16:38 lr: 0.000005 loss_cls: 4.1091 (3.8964) grad_norm: 2.3617 (2.4165) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-10 23:26:00 root] (utils.py 283): INFO Epoch: [22] [1210/2502] eta: 0:16:30 lr: 0.000005 loss_cls: 4.1272 (3.8961) grad_norm: 2.4044 (2.4171) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 23:26:08 root] (utils.py 283): INFO Epoch: [22] [1220/2502] eta: 0:16:22 lr: 0.000005 loss_cls: 3.9760 (3.8937) grad_norm: 2.3673 (2.4162) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 23:26:15 root] (utils.py 283): INFO Epoch: [22] [1230/2502] eta: 0:16:15 lr: 0.000005 loss_cls: 4.0018 (3.8955) grad_norm: 2.3673 (2.4163) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 23:26:23 root] (utils.py 283): INFO Epoch: [22] [1240/2502] eta: 0:16:07 lr: 0.000005 loss_cls: 4.0714 (3.8960) grad_norm: 2.3949 (2.4159) time: 0.7707 data: 0.0002 max mem: 8426 +[2024-12-10 23:26:31 root] (utils.py 283): INFO Epoch: [22] [1250/2502] eta: 0:15:59 lr: 0.000005 loss_cls: 3.9282 (3.8951) grad_norm: 2.3997 (2.4159) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-10 23:26:38 root] (utils.py 283): INFO Epoch: [22] [1260/2502] eta: 0:15:52 lr: 0.000005 loss_cls: 3.7144 (3.8959) grad_norm: 2.4107 (2.4161) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 23:26:46 root] (utils.py 283): INFO Epoch: [22] [1270/2502] eta: 0:15:44 lr: 0.000005 loss_cls: 4.1531 (3.8988) grad_norm: 2.3975 (2.4156) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-10 23:26:54 root] (utils.py 283): INFO Epoch: [22] [1280/2502] eta: 0:15:36 lr: 0.000005 loss_cls: 4.4212 (3.9018) grad_norm: 2.3918 (2.4156) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:01 root] (utils.py 283): INFO Epoch: [22] [1290/2502] eta: 0:15:29 lr: 0.000005 loss_cls: 4.1196 (3.9030) grad_norm: 2.3953 (2.4158) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:09 root] (utils.py 283): INFO Epoch: [22] [1300/2502] eta: 0:15:21 lr: 0.000005 loss_cls: 4.0423 (3.9007) grad_norm: 2.3325 (2.4149) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:17 root] (utils.py 283): INFO Epoch: [22] [1310/2502] eta: 0:15:13 lr: 0.000005 loss_cls: 3.8548 (3.9009) grad_norm: 2.3187 (2.4146) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:24 root] (utils.py 283): INFO Epoch: [22] [1320/2502] eta: 0:15:06 lr: 0.000005 loss_cls: 3.8835 (3.9027) grad_norm: 2.4299 (2.4149) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:32 root] (utils.py 283): INFO Epoch: [22] [1330/2502] eta: 0:14:58 lr: 0.000005 loss_cls: 4.2313 (3.9030) grad_norm: 2.3444 (2.4142) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:40 root] (utils.py 283): INFO Epoch: [22] [1340/2502] eta: 0:14:50 lr: 0.000005 loss_cls: 4.2705 (3.9064) grad_norm: 2.3444 (2.4142) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:47 root] (utils.py 283): INFO Epoch: [22] [1350/2502] eta: 0:14:43 lr: 0.000005 loss_cls: 4.2615 (3.9055) grad_norm: 2.3845 (2.4140) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 23:27:55 root] (utils.py 283): INFO Epoch: [22] [1360/2502] eta: 0:14:35 lr: 0.000005 loss_cls: 4.0579 (3.9074) grad_norm: 2.3822 (2.4138) time: 0.7677 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:03 root] (utils.py 283): INFO Epoch: [22] [1370/2502] eta: 0:14:27 lr: 0.000005 loss_cls: 4.0579 (3.9058) grad_norm: 2.3801 (2.4135) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:10 root] (utils.py 283): INFO Epoch: [22] [1380/2502] eta: 0:14:20 lr: 0.000005 loss_cls: 4.1950 (3.9085) grad_norm: 2.3868 (2.4135) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:18 root] (utils.py 283): INFO Epoch: [22] [1390/2502] eta: 0:14:12 lr: 0.000005 loss_cls: 4.2703 (3.9070) grad_norm: 2.4193 (2.4146) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 23:28:26 root] (utils.py 283): INFO Epoch: [22] [1400/2502] eta: 0:14:04 lr: 0.000005 loss_cls: 3.7539 (3.9073) grad_norm: 2.4829 (2.4148) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:33 root] (utils.py 283): INFO Epoch: [22] [1410/2502] eta: 0:13:57 lr: 0.000005 loss_cls: 3.9436 (3.9062) grad_norm: 2.4235 (2.4145) time: 0.7719 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:41 root] (utils.py 283): INFO Epoch: [22] [1420/2502] eta: 0:13:49 lr: 0.000005 loss_cls: 3.9436 (3.9045) grad_norm: 2.3344 (2.4140) time: 0.7726 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:49 root] (utils.py 283): INFO Epoch: [22] [1430/2502] eta: 0:13:41 lr: 0.000005 loss_cls: 3.9753 (3.9033) grad_norm: 2.3575 (2.4141) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-10 23:28:56 root] (utils.py 283): INFO Epoch: [22] [1440/2502] eta: 0:13:34 lr: 0.000005 loss_cls: 4.0534 (3.9054) grad_norm: 2.3986 (2.4141) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 23:29:04 root] (utils.py 283): INFO Epoch: [22] [1450/2502] eta: 0:13:26 lr: 0.000005 loss_cls: 4.0168 (3.9038) grad_norm: 2.3958 (2.4144) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 23:29:12 root] (utils.py 283): INFO Epoch: [22] [1460/2502] eta: 0:13:18 lr: 0.000005 loss_cls: 4.0713 (3.9050) grad_norm: 2.3856 (2.4141) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-10 23:29:19 root] (utils.py 283): INFO Epoch: [22] [1470/2502] eta: 0:13:10 lr: 0.000005 loss_cls: 4.0713 (3.9044) grad_norm: 2.3721 (2.4138) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-10 23:29:27 root] (utils.py 283): INFO Epoch: [22] [1480/2502] eta: 0:13:03 lr: 0.000005 loss_cls: 3.9305 (3.9061) grad_norm: 2.3893 (2.4138) time: 0.7698 data: 0.0002 max mem: 8426 +[2024-12-10 23:29:35 root] (utils.py 283): INFO Epoch: [22] [1490/2502] eta: 0:12:55 lr: 0.000005 loss_cls: 3.9305 (3.9041) grad_norm: 2.4009 (2.4138) time: 0.7870 data: 0.0003 max mem: 8426 +[2024-12-10 23:29:43 root] (utils.py 283): INFO Epoch: [22] [1500/2502] eta: 0:12:48 lr: 0.000005 loss_cls: 3.6901 (3.9041) grad_norm: 2.4065 (2.4137) time: 0.7904 data: 0.0003 max mem: 8426 +[2024-12-10 23:29:51 root] (utils.py 283): INFO Epoch: [22] [1510/2502] eta: 0:12:40 lr: 0.000005 loss_cls: 3.9750 (3.9046) grad_norm: 2.3078 (2.4132) time: 0.7840 data: 0.0003 max mem: 8426 +[2024-12-10 23:29:58 root] (utils.py 283): INFO Epoch: [22] [1520/2502] eta: 0:12:33 lr: 0.000005 loss_cls: 3.7963 (3.9035) grad_norm: 2.3527 (2.4132) time: 0.7841 data: 0.0003 max mem: 8426 +[2024-12-10 23:30:06 root] (utils.py 283): INFO Epoch: [22] [1530/2502] eta: 0:12:25 lr: 0.000005 loss_cls: 3.4858 (3.9013) grad_norm: 2.3915 (2.4132) time: 0.7865 data: 0.0003 max mem: 8426 +[2024-12-10 23:30:14 root] (utils.py 283): INFO Epoch: [22] [1540/2502] eta: 0:12:18 lr: 0.000005 loss_cls: 3.8882 (3.9010) grad_norm: 2.3905 (2.4130) time: 0.7887 data: 0.0003 max mem: 8426 +[2024-12-10 23:30:22 root] (utils.py 283): INFO Epoch: [22] [1550/2502] eta: 0:12:10 lr: 0.000005 loss_cls: 4.1158 (3.9023) grad_norm: 2.3777 (2.4133) time: 0.7872 data: 0.0003 max mem: 8426 +[2024-12-10 23:30:30 root] (utils.py 283): INFO Epoch: [22] [1560/2502] eta: 0:12:03 lr: 0.000005 loss_cls: 4.1662 (3.9023) grad_norm: 2.3759 (2.4131) time: 0.7842 data: 0.0002 max mem: 8426 +[2024-12-10 23:30:38 root] (utils.py 283): INFO Epoch: [22] [1570/2502] eta: 0:11:55 lr: 0.000005 loss_cls: 4.1996 (3.9036) grad_norm: 2.3609 (2.4134) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-10 23:30:46 root] (utils.py 283): INFO Epoch: [22] [1580/2502] eta: 0:11:47 lr: 0.000005 loss_cls: 4.0936 (3.9032) grad_norm: 2.3501 (2.4133) time: 0.7837 data: 0.0002 max mem: 8426 +[2024-12-10 23:30:53 root] (utils.py 283): INFO Epoch: [22] [1590/2502] eta: 0:11:40 lr: 0.000005 loss_cls: 3.9211 (3.9023) grad_norm: 2.4044 (2.4132) time: 0.7838 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:01 root] (utils.py 283): INFO Epoch: [22] [1600/2502] eta: 0:11:32 lr: 0.000005 loss_cls: 3.9213 (3.9031) grad_norm: 2.4136 (2.4133) time: 0.7820 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:09 root] (utils.py 283): INFO Epoch: [22] [1610/2502] eta: 0:11:25 lr: 0.000005 loss_cls: 4.1048 (3.9036) grad_norm: 2.3925 (2.4128) time: 0.7827 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:17 root] (utils.py 283): INFO Epoch: [22] [1620/2502] eta: 0:11:17 lr: 0.000005 loss_cls: 3.9708 (3.9033) grad_norm: 2.3550 (2.4129) time: 0.7847 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:25 root] (utils.py 283): INFO Epoch: [22] [1630/2502] eta: 0:11:09 lr: 0.000005 loss_cls: 3.8780 (3.9043) grad_norm: 2.4145 (2.4129) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:33 root] (utils.py 283): INFO Epoch: [22] [1640/2502] eta: 0:11:02 lr: 0.000005 loss_cls: 3.9420 (3.9041) grad_norm: 2.3724 (2.4128) time: 0.7833 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:40 root] (utils.py 283): INFO Epoch: [22] [1650/2502] eta: 0:10:54 lr: 0.000005 loss_cls: 3.9917 (3.9037) grad_norm: 2.3724 (2.4124) time: 0.7853 data: 0.0002 max mem: 8426 +[2024-12-10 23:31:48 root] (utils.py 283): INFO Epoch: [22] [1660/2502] eta: 0:10:47 lr: 0.000005 loss_cls: 3.9632 (3.9032) grad_norm: 2.3773 (2.4122) time: 0.7862 data: 0.0003 max mem: 8426 +[2024-12-10 23:31:56 root] (utils.py 283): INFO Epoch: [22] [1670/2502] eta: 0:10:39 lr: 0.000005 loss_cls: 3.9632 (3.9031) grad_norm: 2.3625 (2.4123) time: 0.7859 data: 0.0003 max mem: 8426 +[2024-12-10 23:32:04 root] (utils.py 283): INFO Epoch: [22] [1680/2502] eta: 0:10:31 lr: 0.000005 loss_cls: 3.9849 (3.9039) grad_norm: 2.3625 (2.4123) time: 0.7847 data: 0.0002 max mem: 8426 +[2024-12-10 23:32:12 root] (utils.py 283): INFO Epoch: [22] [1690/2502] eta: 0:10:24 lr: 0.000005 loss_cls: 3.9449 (3.9032) grad_norm: 2.3543 (2.4122) time: 0.7756 data: 0.0002 max mem: 8426 +[2024-12-10 23:32:19 root] (utils.py 283): INFO Epoch: [22] [1700/2502] eta: 0:10:16 lr: 0.000005 loss_cls: 3.7908 (3.9023) grad_norm: 2.3543 (2.4123) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 23:32:27 root] (utils.py 283): INFO Epoch: [22] [1710/2502] eta: 0:10:08 lr: 0.000005 loss_cls: 3.8173 (3.9013) grad_norm: 2.4328 (2.4123) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 23:32:35 root] (utils.py 283): INFO Epoch: [22] [1720/2502] eta: 0:10:01 lr: 0.000005 loss_cls: 3.8849 (3.9011) grad_norm: 2.4185 (2.4122) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 23:32:42 root] (utils.py 283): INFO Epoch: [22] [1730/2502] eta: 0:09:53 lr: 0.000005 loss_cls: 4.0073 (3.9013) grad_norm: 2.4046 (2.4119) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-10 23:32:50 root] (utils.py 283): INFO Epoch: [22] [1740/2502] eta: 0:09:45 lr: 0.000005 loss_cls: 4.1965 (3.9024) grad_norm: 2.3354 (2.4116) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 23:32:58 root] (utils.py 283): INFO Epoch: [22] [1750/2502] eta: 0:09:37 lr: 0.000005 loss_cls: 4.1965 (3.9031) grad_norm: 2.3350 (2.4115) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:05 root] (utils.py 283): INFO Epoch: [22] [1760/2502] eta: 0:09:30 lr: 0.000005 loss_cls: 4.0218 (3.9025) grad_norm: 2.4168 (2.4120) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:13 root] (utils.py 283): INFO Epoch: [22] [1770/2502] eta: 0:09:22 lr: 0.000005 loss_cls: 3.9013 (3.9013) grad_norm: 2.4708 (2.4125) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:21 root] (utils.py 283): INFO Epoch: [22] [1780/2502] eta: 0:09:14 lr: 0.000005 loss_cls: 3.9715 (3.9025) grad_norm: 2.4604 (2.4127) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-10 23:33:28 root] (utils.py 283): INFO Epoch: [22] [1790/2502] eta: 0:09:07 lr: 0.000005 loss_cls: 4.1860 (3.9032) grad_norm: 2.4008 (2.4126) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:36 root] (utils.py 283): INFO Epoch: [22] [1800/2502] eta: 0:08:59 lr: 0.000005 loss_cls: 4.2130 (3.9048) grad_norm: 2.4148 (2.4127) time: 0.7689 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:44 root] (utils.py 283): INFO Epoch: [22] [1810/2502] eta: 0:08:51 lr: 0.000005 loss_cls: 4.2036 (3.9044) grad_norm: 2.4124 (2.4126) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:51 root] (utils.py 283): INFO Epoch: [22] [1820/2502] eta: 0:08:44 lr: 0.000005 loss_cls: 3.7344 (3.9037) grad_norm: 2.3767 (2.4123) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 23:33:59 root] (utils.py 283): INFO Epoch: [22] [1830/2502] eta: 0:08:36 lr: 0.000005 loss_cls: 3.5693 (3.9020) grad_norm: 2.3617 (2.4121) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:06 root] (utils.py 283): INFO Epoch: [22] [1840/2502] eta: 0:08:28 lr: 0.000005 loss_cls: 3.9860 (3.9030) grad_norm: 2.3944 (2.4121) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:14 root] (utils.py 283): INFO Epoch: [22] [1850/2502] eta: 0:08:20 lr: 0.000005 loss_cls: 3.8364 (3.9010) grad_norm: 2.4096 (2.4120) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:22 root] (utils.py 283): INFO Epoch: [22] [1860/2502] eta: 0:08:13 lr: 0.000005 loss_cls: 3.9742 (3.9024) grad_norm: 2.3828 (2.4122) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:29 root] (utils.py 283): INFO Epoch: [22] [1870/2502] eta: 0:08:05 lr: 0.000005 loss_cls: 4.0262 (3.9027) grad_norm: 2.3619 (2.4123) time: 0.7604 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:37 root] (utils.py 283): INFO Epoch: [22] [1880/2502] eta: 0:07:57 lr: 0.000005 loss_cls: 3.9721 (3.9032) grad_norm: 2.3619 (2.4124) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:45 root] (utils.py 283): INFO Epoch: [22] [1890/2502] eta: 0:07:50 lr: 0.000005 loss_cls: 3.7685 (3.9015) grad_norm: 2.4041 (2.4124) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 23:34:52 root] (utils.py 283): INFO Epoch: [22] [1900/2502] eta: 0:07:42 lr: 0.000005 loss_cls: 3.8451 (3.9017) grad_norm: 2.4296 (2.4127) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:00 root] (utils.py 283): INFO Epoch: [22] [1910/2502] eta: 0:07:34 lr: 0.000005 loss_cls: 3.9712 (3.9007) grad_norm: 2.4366 (2.4128) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:08 root] (utils.py 283): INFO Epoch: [22] [1920/2502] eta: 0:07:27 lr: 0.000005 loss_cls: 3.9493 (3.9013) grad_norm: 2.4603 (2.4130) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:15 root] (utils.py 283): INFO Epoch: [22] [1930/2502] eta: 0:07:19 lr: 0.000005 loss_cls: 4.2482 (3.9014) grad_norm: 2.4852 (2.4135) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:23 root] (utils.py 283): INFO Epoch: [22] [1940/2502] eta: 0:07:11 lr: 0.000005 loss_cls: 4.1646 (3.9019) grad_norm: 2.4586 (2.4133) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:31 root] (utils.py 283): INFO Epoch: [22] [1950/2502] eta: 0:07:04 lr: 0.000005 loss_cls: 3.9276 (3.9017) grad_norm: 2.4448 (2.4133) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:38 root] (utils.py 283): INFO Epoch: [22] [1960/2502] eta: 0:06:56 lr: 0.000005 loss_cls: 3.8230 (3.9008) grad_norm: 2.4741 (2.4137) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:46 root] (utils.py 283): INFO Epoch: [22] [1970/2502] eta: 0:06:48 lr: 0.000005 loss_cls: 4.0393 (3.9018) grad_norm: 2.3937 (2.4133) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 23:35:53 root] (utils.py 283): INFO Epoch: [22] [1980/2502] eta: 0:06:40 lr: 0.000005 loss_cls: 4.0871 (3.9005) grad_norm: 2.3755 (2.4134) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:01 root] (utils.py 283): INFO Epoch: [22] [1990/2502] eta: 0:06:33 lr: 0.000005 loss_cls: 3.9060 (3.8998) grad_norm: 2.4132 (2.4137) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:09 root] (utils.py 283): INFO Epoch: [22] [2000/2502] eta: 0:06:25 lr: 0.000005 loss_cls: 4.0085 (3.8996) grad_norm: 2.3634 (2.4139) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:16 root] (utils.py 283): INFO Epoch: [22] [2010/2502] eta: 0:06:17 lr: 0.000005 loss_cls: 4.0085 (3.9010) grad_norm: 2.3651 (2.4137) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:24 root] (utils.py 283): INFO Epoch: [22] [2020/2502] eta: 0:06:10 lr: 0.000005 loss_cls: 4.1532 (3.9023) grad_norm: 2.3630 (2.4138) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:32 root] (utils.py 283): INFO Epoch: [22] [2030/2502] eta: 0:06:02 lr: 0.000005 loss_cls: 4.0034 (3.9007) grad_norm: 2.3458 (2.4132) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-10 23:36:39 root] (utils.py 283): INFO Epoch: [22] [2040/2502] eta: 0:05:54 lr: 0.000005 loss_cls: 3.7392 (3.8996) grad_norm: 2.3089 (2.4129) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:47 root] (utils.py 283): INFO Epoch: [22] [2050/2502] eta: 0:05:47 lr: 0.000005 loss_cls: 3.9295 (3.8997) grad_norm: 2.3056 (2.4126) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-10 23:36:55 root] (utils.py 283): INFO Epoch: [22] [2060/2502] eta: 0:05:39 lr: 0.000005 loss_cls: 4.1250 (3.9005) grad_norm: 2.3568 (2.4127) time: 0.7694 data: 0.0003 max mem: 8426 +[2024-12-10 23:37:03 root] (utils.py 283): INFO Epoch: [22] [2070/2502] eta: 0:05:31 lr: 0.000005 loss_cls: 4.1250 (3.8995) grad_norm: 2.3879 (2.4126) time: 0.7738 data: 0.0003 max mem: 8426 +[2024-12-10 23:37:10 root] (utils.py 283): INFO Epoch: [22] [2080/2502] eta: 0:05:24 lr: 0.000005 loss_cls: 4.1265 (3.9004) grad_norm: 2.3765 (2.4125) time: 0.7740 data: 0.0002 max mem: 8426 +[2024-12-10 23:37:18 root] (utils.py 283): INFO Epoch: [22] [2090/2502] eta: 0:05:16 lr: 0.000005 loss_cls: 3.9326 (3.8990) grad_norm: 2.3765 (2.4123) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-10 23:37:26 root] (utils.py 283): INFO Epoch: [22] [2100/2502] eta: 0:05:08 lr: 0.000005 loss_cls: 3.7870 (3.8986) grad_norm: 2.4033 (2.4125) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-10 23:37:33 root] (utils.py 283): INFO Epoch: [22] [2110/2502] eta: 0:05:01 lr: 0.000005 loss_cls: 4.0469 (3.8993) grad_norm: 2.4344 (2.4126) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-10 23:37:41 root] (utils.py 283): INFO Epoch: [22] [2120/2502] eta: 0:04:53 lr: 0.000005 loss_cls: 4.0469 (3.8995) grad_norm: 2.3984 (2.4124) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 23:37:49 root] (utils.py 283): INFO Epoch: [22] [2130/2502] eta: 0:04:45 lr: 0.000005 loss_cls: 4.0832 (3.8993) grad_norm: 2.3137 (2.4121) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 23:37:56 root] (utils.py 283): INFO Epoch: [22] [2140/2502] eta: 0:04:38 lr: 0.000005 loss_cls: 3.8117 (3.8972) grad_norm: 2.3410 (2.4123) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-10 23:38:04 root] (utils.py 283): INFO Epoch: [22] [2150/2502] eta: 0:04:30 lr: 0.000005 loss_cls: 3.5468 (3.8965) grad_norm: 2.4237 (2.4125) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-10 23:38:12 root] (utils.py 283): INFO Epoch: [22] [2160/2502] eta: 0:04:22 lr: 0.000005 loss_cls: 4.0718 (3.8978) grad_norm: 2.4237 (2.4124) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 23:38:19 root] (utils.py 283): INFO Epoch: [22] [2170/2502] eta: 0:04:14 lr: 0.000005 loss_cls: 4.0280 (3.8965) grad_norm: 2.4366 (2.4125) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 23:38:27 root] (utils.py 283): INFO Epoch: [22] [2180/2502] eta: 0:04:07 lr: 0.000005 loss_cls: 3.7686 (3.8964) grad_norm: 2.4316 (2.4126) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-10 23:38:35 root] (utils.py 283): INFO Epoch: [22] [2190/2502] eta: 0:03:59 lr: 0.000005 loss_cls: 3.8410 (3.8954) grad_norm: 2.4097 (2.4127) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-10 23:38:42 root] (utils.py 283): INFO Epoch: [22] [2200/2502] eta: 0:03:51 lr: 0.000005 loss_cls: 3.2138 (3.8933) grad_norm: 2.4549 (2.4131) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-10 23:38:50 root] (utils.py 283): INFO Epoch: [22] [2210/2502] eta: 0:03:44 lr: 0.000005 loss_cls: 3.5943 (3.8929) grad_norm: 2.4482 (2.4132) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 23:38:58 root] (utils.py 283): INFO Epoch: [22] [2220/2502] eta: 0:03:36 lr: 0.000005 loss_cls: 4.0737 (3.8940) grad_norm: 2.3902 (2.4134) time: 0.7762 data: 0.0003 max mem: 8426 +[2024-12-10 23:39:06 root] (utils.py 283): INFO Epoch: [22] [2230/2502] eta: 0:03:28 lr: 0.000005 loss_cls: 4.1388 (3.8938) grad_norm: 2.4374 (2.4133) time: 0.7839 data: 0.0002 max mem: 8426 +[2024-12-10 23:39:13 root] (utils.py 283): INFO Epoch: [22] [2240/2502] eta: 0:03:21 lr: 0.000005 loss_cls: 3.8976 (3.8938) grad_norm: 2.4412 (2.4135) time: 0.7835 data: 0.0002 max mem: 8426 +[2024-12-10 23:39:21 root] (utils.py 283): INFO Epoch: [22] [2250/2502] eta: 0:03:13 lr: 0.000005 loss_cls: 3.8976 (3.8930) grad_norm: 2.4040 (2.4135) time: 0.7824 data: 0.0003 max mem: 8426 +[2024-12-10 23:39:29 root] (utils.py 283): INFO Epoch: [22] [2260/2502] eta: 0:03:05 lr: 0.000005 loss_cls: 4.0461 (3.8937) grad_norm: 2.3906 (2.4135) time: 0.7825 data: 0.0003 max mem: 8426 +[2024-12-10 23:39:37 root] (utils.py 283): INFO Epoch: [22] [2270/2502] eta: 0:02:58 lr: 0.000005 loss_cls: 4.2930 (3.8948) grad_norm: 2.4545 (2.4137) time: 0.7800 data: 0.0002 max mem: 8426 +[2024-12-10 23:39:44 root] (utils.py 283): INFO Epoch: [22] [2280/2502] eta: 0:02:50 lr: 0.000005 loss_cls: 4.2976 (3.8964) grad_norm: 2.4106 (2.4137) time: 0.7687 data: 0.0003 max mem: 8426 +[2024-12-10 23:39:52 root] (utils.py 283): INFO Epoch: [22] [2290/2502] eta: 0:02:42 lr: 0.000005 loss_cls: 4.0766 (3.8955) grad_norm: 2.3826 (2.4136) time: 0.7571 data: 0.0003 max mem: 8426 +[2024-12-10 23:40:00 root] (utils.py 283): INFO Epoch: [22] [2300/2502] eta: 0:02:35 lr: 0.000005 loss_cls: 3.9217 (3.8957) grad_norm: 2.3862 (2.4135) time: 0.7598 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:07 root] (utils.py 283): INFO Epoch: [22] [2310/2502] eta: 0:02:27 lr: 0.000005 loss_cls: 3.8424 (3.8950) grad_norm: 2.3862 (2.4134) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:15 root] (utils.py 283): INFO Epoch: [22] [2320/2502] eta: 0:02:19 lr: 0.000005 loss_cls: 3.8424 (3.8945) grad_norm: 2.3937 (2.4136) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:23 root] (utils.py 283): INFO Epoch: [22] [2330/2502] eta: 0:02:12 lr: 0.000005 loss_cls: 3.7704 (3.8937) grad_norm: 2.4037 (2.4136) time: 0.7700 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:31 root] (utils.py 283): INFO Epoch: [22] [2340/2502] eta: 0:02:04 lr: 0.000005 loss_cls: 3.8072 (3.8939) grad_norm: 2.4275 (2.4136) time: 0.7807 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:38 root] (utils.py 283): INFO Epoch: [22] [2350/2502] eta: 0:01:56 lr: 0.000005 loss_cls: 4.0673 (3.8936) grad_norm: 2.3834 (2.4135) time: 0.7855 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:46 root] (utils.py 283): INFO Epoch: [22] [2360/2502] eta: 0:01:49 lr: 0.000005 loss_cls: 3.8609 (3.8928) grad_norm: 2.3569 (2.4135) time: 0.7808 data: 0.0002 max mem: 8426 +[2024-12-10 23:40:54 root] (utils.py 283): INFO Epoch: [22] [2370/2502] eta: 0:01:41 lr: 0.000005 loss_cls: 3.9918 (3.8941) grad_norm: 2.3569 (2.4134) time: 0.7831 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:02 root] (utils.py 283): INFO Epoch: [22] [2380/2502] eta: 0:01:33 lr: 0.000005 loss_cls: 3.9918 (3.8939) grad_norm: 2.3845 (2.4137) time: 0.7771 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:09 root] (utils.py 283): INFO Epoch: [22] [2390/2502] eta: 0:01:26 lr: 0.000005 loss_cls: 3.7098 (3.8930) grad_norm: 2.4045 (2.4137) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:17 root] (utils.py 283): INFO Epoch: [22] [2400/2502] eta: 0:01:18 lr: 0.000005 loss_cls: 3.6484 (3.8928) grad_norm: 2.4157 (2.4139) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:25 root] (utils.py 283): INFO Epoch: [22] [2410/2502] eta: 0:01:10 lr: 0.000005 loss_cls: 4.1204 (3.8939) grad_norm: 2.4375 (2.4142) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:32 root] (utils.py 283): INFO Epoch: [22] [2420/2502] eta: 0:01:03 lr: 0.000005 loss_cls: 3.8830 (3.8923) grad_norm: 2.4170 (2.4141) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:40 root] (utils.py 283): INFO Epoch: [22] [2430/2502] eta: 0:00:55 lr: 0.000005 loss_cls: 3.5697 (3.8911) grad_norm: 2.3821 (2.4143) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 23:41:48 root] (utils.py 283): INFO Epoch: [22] [2440/2502] eta: 0:00:47 lr: 0.000005 loss_cls: 3.7090 (3.8908) grad_norm: 2.4048 (2.4143) time: 0.7600 data: 0.0003 max mem: 8426 +[2024-12-10 23:41:55 root] (utils.py 283): INFO Epoch: [22] [2450/2502] eta: 0:00:39 lr: 0.000005 loss_cls: 3.8996 (3.8906) grad_norm: 2.3835 (2.4142) time: 0.7589 data: 0.0003 max mem: 8426 +[2024-12-10 23:42:03 root] (utils.py 283): INFO Epoch: [22] [2460/2502] eta: 0:00:32 lr: 0.000005 loss_cls: 3.6467 (3.8904) grad_norm: 2.4334 (2.4142) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-10 23:42:10 root] (utils.py 283): INFO Epoch: [22] [2470/2502] eta: 0:00:24 lr: 0.000005 loss_cls: 4.0431 (3.8908) grad_norm: 2.4334 (2.4142) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-10 23:42:18 root] (utils.py 283): INFO Epoch: [22] [2480/2502] eta: 0:00:16 lr: 0.000005 loss_cls: 4.0480 (3.8906) grad_norm: 2.3825 (2.4140) time: 0.7599 data: 0.0003 max mem: 8426 +[2024-12-10 23:42:26 root] (utils.py 283): INFO Epoch: [22] [2490/2502] eta: 0:00:09 lr: 0.000005 loss_cls: 3.9741 (3.8914) grad_norm: 2.3825 (2.4141) time: 0.7925 data: 0.0238 max mem: 8426 +[2024-12-10 23:42:34 root] (utils.py 283): INFO Epoch: [22] [2500/2502] eta: 0:00:01 lr: 0.000005 loss_cls: 3.9357 (3.8909) grad_norm: 2.4350 (2.4141) time: 0.7931 data: 0.0237 max mem: 8426 +[2024-12-10 23:42:35 root] (utils.py 283): INFO Epoch: [22] [2501/2502] eta: 0:00:00 lr: 0.000005 loss_cls: 3.9570 (3.8911) grad_norm: 2.3928 (2.4141) time: 0.7923 data: 0.0237 max mem: 8426 +[2024-12-10 23:42:35 root] (utils.py 297): INFO Epoch: [22] Total time: 0:32:03 (0.7686 s / it) +[2024-12-10 23:42:35 root] (engine.py 179): INFO Averaged stats:lr: 0.000005 loss_cls: 3.9570 (3.8866) grad_norm: 2.3928 (2.4141) +[2024-12-10 23:42:35 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6358 (0.6358) acc1: 85.1562 (85.1562) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1273 data: 0.0003 max mem: 8426 +[2024-12-10 23:42:36 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7242 (0.8055) acc1: 85.1562 (82.4574) acc3: 96.0938 (93.9631) acc5: 96.8750 (96.5199) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 23:42:38 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8566 (0.8616) acc1: 78.1250 (81.0640) acc3: 92.9688 (93.3036) acc5: 96.0938 (95.6845) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-10 23:42:39 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9419 (0.8751) acc1: 78.1250 (80.1663) acc3: 92.9688 (93.3972) acc5: 96.0938 (95.7409) time: 0.1285 data: 0.0004 max mem: 8426 +[2024-12-10 23:42:40 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8149 (0.8627) acc1: 80.4688 (80.7736) acc3: 94.5312 (93.4642) acc5: 96.0938 (95.8651) time: 0.1289 data: 0.0004 max mem: 8426 +[2024-12-10 23:42:42 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0192 (0.9474) acc1: 75.7812 (78.7531) acc3: 88.2812 (91.9424) acc5: 92.9688 (94.8070) time: 0.1420 data: 0.0137 max mem: 8426 +[2024-12-10 23:42:44 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2464 (0.9935) acc1: 71.0938 (78.0482) acc3: 85.9375 (91.0733) acc5: 89.8438 (93.9805) time: 0.1718 data: 0.0436 max mem: 8426 +[2024-12-10 23:42:45 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1660 (1.0336) acc1: 75.7812 (77.2337) acc3: 86.7188 (90.5150) acc5: 89.8438 (93.5849) time: 0.1585 data: 0.0302 max mem: 8426 +[2024-12-10 23:42:46 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2127 (1.0672) acc1: 73.4375 (76.4660) acc3: 86.7188 (89.9402) acc5: 89.8438 (93.0266) time: 0.1286 data: 0.0007 max mem: 8426 +[2024-12-10 23:42:48 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2376 (1.0953) acc1: 71.0938 (75.7126) acc3: 85.9375 (89.5604) acc5: 89.8438 (92.7284) time: 0.1286 data: 0.0008 max mem: 8426 +[2024-12-10 23:42:48 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1513 (1.0836) acc1: 73.4375 (75.9520) acc3: 89.0625 (89.7600) acc5: 92.1875 (92.9280) time: 0.1268 data: 0.0007 max mem: 8426 +[2024-12-10 23:42:48 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1371 s / it) +[2024-12-10 23:42:49 root] (engine.py 264): INFO * Acc@1 75.884 Acc@3 89.674 Acc@5 92.936 loss 1.086 flops 1.285 layer_flops 1.251 +[2024-12-10 23:42:49 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.9% +[2024-12-10 23:42:49 root] (main.py 576): INFO Max accuracy: 75.88% +[2024-12-10 23:42:50 root] (utils.py 283): INFO Epoch: [23] [ 0/2502] eta: 0:32:28 lr: 0.000004 loss_cls: 3.1261 (3.1261) grad_norm: 2.5117 (2.5117) time: 0.7788 data: 0.0005 max mem: 8426 +[2024-12-10 23:42:58 root] (utils.py 283): INFO Epoch: [23] [ 10/2502] eta: 0:31:53 lr: 0.000004 loss_cls: 3.7850 (3.7367) grad_norm: 2.4033 (2.4430) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 23:43:06 root] (utils.py 283): INFO Epoch: [23] [ 20/2502] eta: 0:32:14 lr: 0.000004 loss_cls: 3.8598 (3.7715) grad_norm: 2.3740 (2.4112) time: 0.7796 data: 0.0003 max mem: 8426 +[2024-12-10 23:43:14 root] (utils.py 283): INFO Epoch: [23] [ 30/2502] eta: 0:32:15 lr: 0.000004 loss_cls: 4.1734 (3.8389) grad_norm: 2.3980 (2.4254) time: 0.7914 data: 0.0003 max mem: 8426 +[2024-12-10 23:43:21 root] (utils.py 283): INFO Epoch: [23] [ 40/2502] eta: 0:31:59 lr: 0.000004 loss_cls: 4.2248 (3.9233) grad_norm: 2.4475 (2.4320) time: 0.7800 data: 0.0003 max mem: 8426 +[2024-12-10 23:43:29 root] (utils.py 283): INFO Epoch: [23] [ 50/2502] eta: 0:31:48 lr: 0.000004 loss_cls: 4.1256 (3.9438) grad_norm: 2.4830 (2.4423) time: 0.7713 data: 0.0003 max mem: 8426 +[2024-12-10 23:43:37 root] (utils.py 283): INFO Epoch: [23] [ 60/2502] eta: 0:31:39 lr: 0.000004 loss_cls: 4.2651 (3.9917) grad_norm: 2.3964 (2.4345) time: 0.7744 data: 0.0003 max mem: 8426 +[2024-12-10 23:43:44 root] (utils.py 283): INFO Epoch: [23] [ 70/2502] eta: 0:31:28 lr: 0.000004 loss_cls: 4.1211 (4.0021) grad_norm: 2.3876 (2.4340) time: 0.7722 data: 0.0002 max mem: 8426 +[2024-12-10 23:43:52 root] (utils.py 283): INFO Epoch: [23] [ 80/2502] eta: 0:31:23 lr: 0.000004 loss_cls: 4.0469 (3.9986) grad_norm: 2.4342 (2.4287) time: 0.7763 data: 0.0002 max mem: 8426 +[2024-12-10 23:44:00 root] (utils.py 283): INFO Epoch: [23] [ 90/2502] eta: 0:31:11 lr: 0.000004 loss_cls: 4.1702 (4.0165) grad_norm: 2.4218 (2.4281) time: 0.7738 data: 0.0003 max mem: 8426 +[2024-12-10 23:44:07 root] (utils.py 283): INFO Epoch: [23] [ 100/2502] eta: 0:31:00 lr: 0.000004 loss_cls: 4.0673 (3.9977) grad_norm: 2.4190 (2.4277) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 23:44:15 root] (utils.py 283): INFO Epoch: [23] [ 110/2502] eta: 0:30:50 lr: 0.000004 loss_cls: 3.9593 (3.9668) grad_norm: 2.4526 (2.4326) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 23:44:23 root] (utils.py 283): INFO Epoch: [23] [ 120/2502] eta: 0:30:40 lr: 0.000004 loss_cls: 3.8135 (3.9549) grad_norm: 2.4048 (2.4229) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-10 23:44:30 root] (utils.py 283): INFO Epoch: [23] [ 130/2502] eta: 0:30:31 lr: 0.000004 loss_cls: 3.9590 (3.9523) grad_norm: 2.3351 (2.4215) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-10 23:44:38 root] (utils.py 283): INFO Epoch: [23] [ 140/2502] eta: 0:30:23 lr: 0.000004 loss_cls: 4.1769 (3.9641) grad_norm: 2.3928 (2.4269) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-10 23:44:46 root] (utils.py 283): INFO Epoch: [23] [ 150/2502] eta: 0:30:14 lr: 0.000004 loss_cls: 4.1108 (3.9666) grad_norm: 2.3928 (2.4223) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 23:44:53 root] (utils.py 283): INFO Epoch: [23] [ 160/2502] eta: 0:30:06 lr: 0.000004 loss_cls: 3.9648 (3.9584) grad_norm: 2.3435 (2.4155) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:01 root] (utils.py 283): INFO Epoch: [23] [ 170/2502] eta: 0:29:57 lr: 0.000004 loss_cls: 3.9648 (3.9498) grad_norm: 2.3628 (2.4163) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:09 root] (utils.py 283): INFO Epoch: [23] [ 180/2502] eta: 0:29:49 lr: 0.000004 loss_cls: 3.7680 (3.9408) grad_norm: 2.3919 (2.4152) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:17 root] (utils.py 283): INFO Epoch: [23] [ 190/2502] eta: 0:29:42 lr: 0.000004 loss_cls: 3.7680 (3.9313) grad_norm: 2.4054 (2.4157) time: 0.7714 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:24 root] (utils.py 283): INFO Epoch: [23] [ 200/2502] eta: 0:29:34 lr: 0.000004 loss_cls: 3.9567 (3.9371) grad_norm: 2.4428 (2.4198) time: 0.7746 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:32 root] (utils.py 283): INFO Epoch: [23] [ 210/2502] eta: 0:29:25 lr: 0.000004 loss_cls: 4.0634 (3.9310) grad_norm: 2.4808 (2.4233) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:39 root] (utils.py 283): INFO Epoch: [23] [ 220/2502] eta: 0:29:16 lr: 0.000004 loss_cls: 4.1540 (3.9398) grad_norm: 2.4798 (2.4242) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:47 root] (utils.py 283): INFO Epoch: [23] [ 230/2502] eta: 0:29:08 lr: 0.000004 loss_cls: 4.0551 (3.9407) grad_norm: 2.3906 (2.4221) time: 0.7623 data: 0.0003 max mem: 8426 +[2024-12-10 23:45:55 root] (utils.py 283): INFO Epoch: [23] [ 240/2502] eta: 0:29:01 lr: 0.000004 loss_cls: 3.8612 (3.9391) grad_norm: 2.3906 (2.4225) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-10 23:46:02 root] (utils.py 283): INFO Epoch: [23] [ 250/2502] eta: 0:28:53 lr: 0.000004 loss_cls: 3.8612 (3.9383) grad_norm: 2.4053 (2.4219) time: 0.7690 data: 0.0002 max mem: 8426 +[2024-12-10 23:46:10 root] (utils.py 283): INFO Epoch: [23] [ 260/2502] eta: 0:28:45 lr: 0.000004 loss_cls: 3.9400 (3.9386) grad_norm: 2.4009 (2.4210) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-10 23:46:18 root] (utils.py 283): INFO Epoch: [23] [ 270/2502] eta: 0:28:37 lr: 0.000004 loss_cls: 3.8890 (3.9260) grad_norm: 2.3967 (2.4218) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-10 23:46:25 root] (utils.py 283): INFO Epoch: [23] [ 280/2502] eta: 0:28:29 lr: 0.000004 loss_cls: 3.5694 (3.9203) grad_norm: 2.3665 (2.4209) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-10 23:46:33 root] (utils.py 283): INFO Epoch: [23] [ 290/2502] eta: 0:28:22 lr: 0.000004 loss_cls: 3.9619 (3.9193) grad_norm: 2.3402 (2.4204) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-10 23:46:41 root] (utils.py 283): INFO Epoch: [23] [ 300/2502] eta: 0:28:14 lr: 0.000004 loss_cls: 4.1068 (3.9184) grad_norm: 2.4233 (2.4220) time: 0.7731 data: 0.0003 max mem: 8426 +[2024-12-10 23:46:49 root] (utils.py 283): INFO Epoch: [23] [ 310/2502] eta: 0:28:07 lr: 0.000004 loss_cls: 3.8698 (3.9076) grad_norm: 2.3951 (2.4197) time: 0.7761 data: 0.0003 max mem: 8426 +[2024-12-10 23:46:57 root] (utils.py 283): INFO Epoch: [23] [ 320/2502] eta: 0:28:00 lr: 0.000004 loss_cls: 3.6281 (3.9007) grad_norm: 2.3879 (2.4218) time: 0.7797 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:04 root] (utils.py 283): INFO Epoch: [23] [ 330/2502] eta: 0:27:53 lr: 0.000004 loss_cls: 3.4861 (3.8890) grad_norm: 2.4152 (2.4225) time: 0.7819 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:12 root] (utils.py 283): INFO Epoch: [23] [ 340/2502] eta: 0:27:46 lr: 0.000004 loss_cls: 3.6677 (3.8888) grad_norm: 2.3929 (2.4235) time: 0.7831 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:20 root] (utils.py 283): INFO Epoch: [23] [ 350/2502] eta: 0:27:39 lr: 0.000004 loss_cls: 3.8034 (3.8909) grad_norm: 2.3753 (2.4229) time: 0.7817 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:28 root] (utils.py 283): INFO Epoch: [23] [ 360/2502] eta: 0:27:32 lr: 0.000004 loss_cls: 3.9676 (3.8877) grad_norm: 2.3822 (2.4220) time: 0.7804 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:36 root] (utils.py 283): INFO Epoch: [23] [ 370/2502] eta: 0:27:25 lr: 0.000004 loss_cls: 3.6953 (3.8806) grad_norm: 2.3841 (2.4215) time: 0.7801 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:43 root] (utils.py 283): INFO Epoch: [23] [ 380/2502] eta: 0:27:18 lr: 0.000004 loss_cls: 3.8293 (3.8801) grad_norm: 2.3841 (2.4200) time: 0.7802 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:51 root] (utils.py 283): INFO Epoch: [23] [ 390/2502] eta: 0:27:10 lr: 0.000004 loss_cls: 3.8603 (3.8766) grad_norm: 2.3380 (2.4174) time: 0.7808 data: 0.0003 max mem: 8426 +[2024-12-10 23:47:59 root] (utils.py 283): INFO Epoch: [23] [ 400/2502] eta: 0:27:03 lr: 0.000004 loss_cls: 4.1910 (3.8868) grad_norm: 2.3380 (2.4167) time: 0.7788 data: 0.0003 max mem: 8426 +[2024-12-10 23:48:07 root] (utils.py 283): INFO Epoch: [23] [ 410/2502] eta: 0:26:55 lr: 0.000004 loss_cls: 4.2332 (3.8880) grad_norm: 2.3696 (2.4164) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 23:48:14 root] (utils.py 283): INFO Epoch: [23] [ 420/2502] eta: 0:26:47 lr: 0.000004 loss_cls: 3.9499 (3.8891) grad_norm: 2.4108 (2.4171) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-10 23:48:22 root] (utils.py 283): INFO Epoch: [23] [ 430/2502] eta: 0:26:39 lr: 0.000004 loss_cls: 4.1772 (3.8903) grad_norm: 2.4741 (2.4181) time: 0.7704 data: 0.0003 max mem: 8426 +[2024-12-10 23:48:30 root] (utils.py 283): INFO Epoch: [23] [ 440/2502] eta: 0:26:31 lr: 0.000004 loss_cls: 3.7412 (3.8855) grad_norm: 2.3798 (2.4156) time: 0.7672 data: 0.0003 max mem: 8426 +[2024-12-10 23:48:37 root] (utils.py 283): INFO Epoch: [23] [ 450/2502] eta: 0:26:23 lr: 0.000004 loss_cls: 3.7066 (3.8836) grad_norm: 2.3114 (2.4136) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 23:48:45 root] (utils.py 283): INFO Epoch: [23] [ 460/2502] eta: 0:26:15 lr: 0.000004 loss_cls: 3.6297 (3.8802) grad_norm: 2.3688 (2.4128) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-10 23:48:53 root] (utils.py 283): INFO Epoch: [23] [ 470/2502] eta: 0:26:08 lr: 0.000004 loss_cls: 3.7939 (3.8824) grad_norm: 2.3381 (2.4111) time: 0.7705 data: 0.0002 max mem: 8426 +[2024-12-10 23:49:00 root] (utils.py 283): INFO Epoch: [23] [ 480/2502] eta: 0:25:59 lr: 0.000004 loss_cls: 4.0393 (3.8790) grad_norm: 2.3110 (2.4105) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-10 23:49:08 root] (utils.py 283): INFO Epoch: [23] [ 490/2502] eta: 0:25:51 lr: 0.000004 loss_cls: 3.9809 (3.8818) grad_norm: 2.3563 (2.4100) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-10 23:49:16 root] (utils.py 283): INFO Epoch: [23] [ 500/2502] eta: 0:25:44 lr: 0.000004 loss_cls: 4.1516 (3.8910) grad_norm: 2.3954 (2.4104) time: 0.7692 data: 0.0003 max mem: 8426 +[2024-12-10 23:49:23 root] (utils.py 283): INFO Epoch: [23] [ 510/2502] eta: 0:25:36 lr: 0.000004 loss_cls: 4.1933 (3.8936) grad_norm: 2.3713 (2.4088) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-10 23:49:31 root] (utils.py 283): INFO Epoch: [23] [ 520/2502] eta: 0:25:28 lr: 0.000004 loss_cls: 4.0619 (3.8971) grad_norm: 2.2959 (2.4081) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 23:49:39 root] (utils.py 283): INFO Epoch: [23] [ 530/2502] eta: 0:25:20 lr: 0.000004 loss_cls: 4.1058 (3.8998) grad_norm: 2.3412 (2.4075) time: 0.7662 data: 0.0002 max mem: 8426 +[2024-12-10 23:49:46 root] (utils.py 283): INFO Epoch: [23] [ 540/2502] eta: 0:25:12 lr: 0.000004 loss_cls: 3.9195 (3.8963) grad_norm: 2.3747 (2.4081) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-10 23:49:54 root] (utils.py 283): INFO Epoch: [23] [ 550/2502] eta: 0:25:04 lr: 0.000004 loss_cls: 3.6931 (3.8947) grad_norm: 2.4672 (2.4092) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-10 23:50:02 root] (utils.py 283): INFO Epoch: [23] [ 560/2502] eta: 0:24:56 lr: 0.000004 loss_cls: 3.9536 (3.8989) grad_norm: 2.4208 (2.4082) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 23:50:09 root] (utils.py 283): INFO Epoch: [23] [ 570/2502] eta: 0:24:48 lr: 0.000004 loss_cls: 3.9536 (3.8996) grad_norm: 2.3740 (2.4088) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 23:50:17 root] (utils.py 283): INFO Epoch: [23] [ 580/2502] eta: 0:24:40 lr: 0.000004 loss_cls: 3.7841 (3.8977) grad_norm: 2.3982 (2.4092) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 23:50:25 root] (utils.py 283): INFO Epoch: [23] [ 590/2502] eta: 0:24:33 lr: 0.000004 loss_cls: 3.7841 (3.8964) grad_norm: 2.3857 (2.4087) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-10 23:50:32 root] (utils.py 283): INFO Epoch: [23] [ 600/2502] eta: 0:24:25 lr: 0.000004 loss_cls: 3.7047 (3.8915) grad_norm: 2.3523 (2.4087) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 23:50:40 root] (utils.py 283): INFO Epoch: [23] [ 610/2502] eta: 0:24:17 lr: 0.000004 loss_cls: 3.5578 (3.8878) grad_norm: 2.4444 (2.4095) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 23:50:48 root] (utils.py 283): INFO Epoch: [23] [ 620/2502] eta: 0:24:09 lr: 0.000004 loss_cls: 3.8099 (3.8860) grad_norm: 2.3990 (2.4091) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 23:50:55 root] (utils.py 283): INFO Epoch: [23] [ 630/2502] eta: 0:24:01 lr: 0.000004 loss_cls: 3.7301 (3.8825) grad_norm: 2.3681 (2.4084) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-10 23:51:03 root] (utils.py 283): INFO Epoch: [23] [ 640/2502] eta: 0:23:53 lr: 0.000004 loss_cls: 3.8798 (3.8877) grad_norm: 2.3681 (2.4088) time: 0.7663 data: 0.0003 max mem: 8426 +[2024-12-10 23:51:11 root] (utils.py 283): INFO Epoch: [23] [ 650/2502] eta: 0:23:45 lr: 0.000004 loss_cls: 4.1635 (3.8854) grad_norm: 2.3460 (2.4079) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-10 23:51:18 root] (utils.py 283): INFO Epoch: [23] [ 660/2502] eta: 0:23:38 lr: 0.000004 loss_cls: 4.1635 (3.8894) grad_norm: 2.3727 (2.4083) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-10 23:51:26 root] (utils.py 283): INFO Epoch: [23] [ 670/2502] eta: 0:23:30 lr: 0.000004 loss_cls: 4.0094 (3.8872) grad_norm: 2.4689 (2.4090) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-10 23:51:34 root] (utils.py 283): INFO Epoch: [23] [ 680/2502] eta: 0:23:22 lr: 0.000004 loss_cls: 3.9674 (3.8891) grad_norm: 2.4139 (2.4083) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-10 23:51:41 root] (utils.py 283): INFO Epoch: [23] [ 690/2502] eta: 0:23:14 lr: 0.000004 loss_cls: 4.0628 (3.8910) grad_norm: 2.4169 (2.4090) time: 0.7625 data: 0.0003 max mem: 8426 +[2024-12-10 23:51:49 root] (utils.py 283): INFO Epoch: [23] [ 700/2502] eta: 0:23:07 lr: 0.000004 loss_cls: 4.0150 (3.8888) grad_norm: 2.4180 (2.4088) time: 0.7707 data: 0.0003 max mem: 8426 +[2024-12-10 23:51:57 root] (utils.py 283): INFO Epoch: [23] [ 710/2502] eta: 0:22:59 lr: 0.000004 loss_cls: 3.9027 (3.8883) grad_norm: 2.4180 (2.4097) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:05 root] (utils.py 283): INFO Epoch: [23] [ 720/2502] eta: 0:22:52 lr: 0.000004 loss_cls: 4.0887 (3.8894) grad_norm: 2.4436 (2.4104) time: 0.7807 data: 0.0003 max mem: 8426 +[2024-12-10 23:52:12 root] (utils.py 283): INFO Epoch: [23] [ 730/2502] eta: 0:22:44 lr: 0.000004 loss_cls: 4.1019 (3.8912) grad_norm: 2.4374 (2.4118) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:20 root] (utils.py 283): INFO Epoch: [23] [ 740/2502] eta: 0:22:36 lr: 0.000004 loss_cls: 4.1795 (3.8901) grad_norm: 2.4208 (2.4112) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:27 root] (utils.py 283): INFO Epoch: [23] [ 750/2502] eta: 0:22:28 lr: 0.000004 loss_cls: 3.6431 (3.8855) grad_norm: 2.3526 (2.4116) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:35 root] (utils.py 283): INFO Epoch: [23] [ 760/2502] eta: 0:22:20 lr: 0.000004 loss_cls: 3.8063 (3.8873) grad_norm: 2.4085 (2.4114) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:43 root] (utils.py 283): INFO Epoch: [23] [ 770/2502] eta: 0:22:13 lr: 0.000004 loss_cls: 4.1204 (3.8898) grad_norm: 2.4095 (2.4116) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:50 root] (utils.py 283): INFO Epoch: [23] [ 780/2502] eta: 0:22:05 lr: 0.000004 loss_cls: 4.1233 (3.8911) grad_norm: 2.4489 (2.4115) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-10 23:52:58 root] (utils.py 283): INFO Epoch: [23] [ 790/2502] eta: 0:21:57 lr: 0.000004 loss_cls: 4.2201 (3.8934) grad_norm: 2.4445 (2.4117) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 23:53:06 root] (utils.py 283): INFO Epoch: [23] [ 800/2502] eta: 0:21:49 lr: 0.000004 loss_cls: 3.8534 (3.8914) grad_norm: 2.4445 (2.4125) time: 0.7674 data: 0.0003 max mem: 8426 +[2024-12-10 23:53:13 root] (utils.py 283): INFO Epoch: [23] [ 810/2502] eta: 0:21:42 lr: 0.000004 loss_cls: 3.6717 (3.8874) grad_norm: 2.4183 (2.4119) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-10 23:53:21 root] (utils.py 283): INFO Epoch: [23] [ 820/2502] eta: 0:21:34 lr: 0.000004 loss_cls: 3.6979 (3.8868) grad_norm: 2.3992 (2.4121) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-10 23:53:29 root] (utils.py 283): INFO Epoch: [23] [ 830/2502] eta: 0:21:26 lr: 0.000004 loss_cls: 3.7700 (3.8829) grad_norm: 2.3994 (2.4121) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-10 23:53:36 root] (utils.py 283): INFO Epoch: [23] [ 840/2502] eta: 0:21:18 lr: 0.000004 loss_cls: 3.7394 (3.8845) grad_norm: 2.4628 (2.4130) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-10 23:53:44 root] (utils.py 283): INFO Epoch: [23] [ 850/2502] eta: 0:21:10 lr: 0.000004 loss_cls: 3.7394 (3.8817) grad_norm: 2.4754 (2.4139) time: 0.7603 data: 0.0003 max mem: 8426 +[2024-12-10 23:53:52 root] (utils.py 283): INFO Epoch: [23] [ 860/2502] eta: 0:21:03 lr: 0.000004 loss_cls: 3.5367 (3.8806) grad_norm: 2.4683 (2.4145) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-10 23:53:59 root] (utils.py 283): INFO Epoch: [23] [ 870/2502] eta: 0:20:55 lr: 0.000004 loss_cls: 3.8504 (3.8797) grad_norm: 2.3902 (2.4142) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-10 23:54:07 root] (utils.py 283): INFO Epoch: [23] [ 880/2502] eta: 0:20:47 lr: 0.000004 loss_cls: 3.8504 (3.8788) grad_norm: 2.3671 (2.4141) time: 0.7595 data: 0.0002 max mem: 8426 +[2024-12-10 23:54:14 root] (utils.py 283): INFO Epoch: [23] [ 890/2502] eta: 0:20:39 lr: 0.000004 loss_cls: 4.1827 (3.8856) grad_norm: 2.3788 (2.4141) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-10 23:54:22 root] (utils.py 283): INFO Epoch: [23] [ 900/2502] eta: 0:20:31 lr: 0.000004 loss_cls: 4.3257 (3.8869) grad_norm: 2.4509 (2.4142) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 23:54:30 root] (utils.py 283): INFO Epoch: [23] [ 910/2502] eta: 0:20:23 lr: 0.000004 loss_cls: 3.9680 (3.8858) grad_norm: 2.3935 (2.4140) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-10 23:54:37 root] (utils.py 283): INFO Epoch: [23] [ 920/2502] eta: 0:20:16 lr: 0.000004 loss_cls: 4.0990 (3.8884) grad_norm: 2.3926 (2.4144) time: 0.7683 data: 0.0003 max mem: 8426 +[2024-12-10 23:54:45 root] (utils.py 283): INFO Epoch: [23] [ 930/2502] eta: 0:20:08 lr: 0.000004 loss_cls: 4.0990 (3.8893) grad_norm: 2.3926 (2.4150) time: 0.7702 data: 0.0003 max mem: 8426 +[2024-12-10 23:54:53 root] (utils.py 283): INFO Epoch: [23] [ 940/2502] eta: 0:20:00 lr: 0.000004 loss_cls: 3.8753 (3.8889) grad_norm: 2.4227 (2.4153) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 23:55:00 root] (utils.py 283): INFO Epoch: [23] [ 950/2502] eta: 0:19:53 lr: 0.000004 loss_cls: 4.0688 (3.8904) grad_norm: 2.3797 (2.4149) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-10 23:55:08 root] (utils.py 283): INFO Epoch: [23] [ 960/2502] eta: 0:19:45 lr: 0.000004 loss_cls: 4.1985 (3.8933) grad_norm: 2.3362 (2.4148) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-10 23:55:16 root] (utils.py 283): INFO Epoch: [23] [ 970/2502] eta: 0:19:37 lr: 0.000004 loss_cls: 4.0437 (3.8919) grad_norm: 2.4353 (2.4151) time: 0.7639 data: 0.0003 max mem: 8426 +[2024-12-10 23:55:23 root] (utils.py 283): INFO Epoch: [23] [ 980/2502] eta: 0:19:29 lr: 0.000004 loss_cls: 3.6610 (3.8896) grad_norm: 2.4536 (2.4152) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-10 23:55:31 root] (utils.py 283): INFO Epoch: [23] [ 990/2502] eta: 0:19:21 lr: 0.000004 loss_cls: 3.8339 (3.8891) grad_norm: 2.5243 (2.4159) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-10 23:55:39 root] (utils.py 283): INFO Epoch: [23] [1000/2502] eta: 0:19:14 lr: 0.000004 loss_cls: 3.8761 (3.8873) grad_norm: 2.4327 (2.4155) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-10 23:55:46 root] (utils.py 283): INFO Epoch: [23] [1010/2502] eta: 0:19:06 lr: 0.000004 loss_cls: 3.8519 (3.8860) grad_norm: 2.3903 (2.4155) time: 0.7713 data: 0.0002 max mem: 8426 +[2024-12-10 23:55:54 root] (utils.py 283): INFO Epoch: [23] [1020/2502] eta: 0:18:58 lr: 0.000004 loss_cls: 3.8347 (3.8836) grad_norm: 2.3957 (2.4157) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-10 23:56:02 root] (utils.py 283): INFO Epoch: [23] [1030/2502] eta: 0:18:51 lr: 0.000004 loss_cls: 3.7954 (3.8828) grad_norm: 2.3929 (2.4154) time: 0.7614 data: 0.0003 max mem: 8426 +[2024-12-10 23:56:09 root] (utils.py 283): INFO Epoch: [23] [1040/2502] eta: 0:18:43 lr: 0.000004 loss_cls: 4.0267 (3.8830) grad_norm: 2.4069 (2.4157) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-10 23:56:17 root] (utils.py 283): INFO Epoch: [23] [1050/2502] eta: 0:18:35 lr: 0.000004 loss_cls: 4.0267 (3.8818) grad_norm: 2.4782 (2.4158) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-10 23:56:25 root] (utils.py 283): INFO Epoch: [23] [1060/2502] eta: 0:18:27 lr: 0.000004 loss_cls: 3.5103 (3.8806) grad_norm: 2.4393 (2.4159) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 23:56:32 root] (utils.py 283): INFO Epoch: [23] [1070/2502] eta: 0:18:20 lr: 0.000004 loss_cls: 3.9730 (3.8802) grad_norm: 2.4216 (2.4158) time: 0.7621 data: 0.0003 max mem: 8426 +[2024-12-10 23:56:40 root] (utils.py 283): INFO Epoch: [23] [1080/2502] eta: 0:18:12 lr: 0.000004 loss_cls: 3.8286 (3.8762) grad_norm: 2.4070 (2.4159) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-10 23:56:47 root] (utils.py 283): INFO Epoch: [23] [1090/2502] eta: 0:18:04 lr: 0.000004 loss_cls: 3.6682 (3.8755) grad_norm: 2.3891 (2.4160) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 23:56:55 root] (utils.py 283): INFO Epoch: [23] [1100/2502] eta: 0:17:56 lr: 0.000004 loss_cls: 3.8916 (3.8755) grad_norm: 2.3891 (2.4157) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-10 23:57:03 root] (utils.py 283): INFO Epoch: [23] [1110/2502] eta: 0:17:49 lr: 0.000004 loss_cls: 3.9651 (3.8761) grad_norm: 2.4147 (2.4158) time: 0.7600 data: 0.0003 max mem: 8426 +[2024-12-10 23:57:10 root] (utils.py 283): INFO Epoch: [23] [1120/2502] eta: 0:17:41 lr: 0.000004 loss_cls: 4.1505 (3.8770) grad_norm: 2.4247 (2.4163) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-10 23:57:18 root] (utils.py 283): INFO Epoch: [23] [1130/2502] eta: 0:17:33 lr: 0.000004 loss_cls: 4.1350 (3.8791) grad_norm: 2.4834 (2.4170) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-10 23:57:26 root] (utils.py 283): INFO Epoch: [23] [1140/2502] eta: 0:17:25 lr: 0.000004 loss_cls: 4.2265 (3.8803) grad_norm: 2.4454 (2.4173) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-10 23:57:33 root] (utils.py 283): INFO Epoch: [23] [1150/2502] eta: 0:17:18 lr: 0.000004 loss_cls: 4.2265 (3.8806) grad_norm: 2.4603 (2.4177) time: 0.7730 data: 0.0002 max mem: 8426 +[2024-12-10 23:57:41 root] (utils.py 283): INFO Epoch: [23] [1160/2502] eta: 0:17:10 lr: 0.000004 loss_cls: 4.1081 (3.8819) grad_norm: 2.4552 (2.4178) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-10 23:57:49 root] (utils.py 283): INFO Epoch: [23] [1170/2502] eta: 0:17:03 lr: 0.000004 loss_cls: 3.9650 (3.8824) grad_norm: 2.4552 (2.4181) time: 0.7771 data: 0.0002 max mem: 8426 +[2024-12-10 23:57:57 root] (utils.py 283): INFO Epoch: [23] [1180/2502] eta: 0:16:55 lr: 0.000004 loss_cls: 3.9650 (3.8816) grad_norm: 2.4697 (2.4186) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-10 23:58:04 root] (utils.py 283): INFO Epoch: [23] [1190/2502] eta: 0:16:47 lr: 0.000004 loss_cls: 3.8740 (3.8807) grad_norm: 2.4597 (2.4191) time: 0.7650 data: 0.0002 max mem: 8426 +[2024-12-10 23:58:12 root] (utils.py 283): INFO Epoch: [23] [1200/2502] eta: 0:16:40 lr: 0.000004 loss_cls: 3.8740 (3.8813) grad_norm: 2.4180 (2.4188) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-10 23:58:20 root] (utils.py 283): INFO Epoch: [23] [1210/2502] eta: 0:16:32 lr: 0.000004 loss_cls: 3.9813 (3.8828) grad_norm: 2.4111 (2.4190) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-10 23:58:27 root] (utils.py 283): INFO Epoch: [23] [1220/2502] eta: 0:16:24 lr: 0.000004 loss_cls: 4.0358 (3.8831) grad_norm: 2.4325 (2.4195) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-10 23:58:35 root] (utils.py 283): INFO Epoch: [23] [1230/2502] eta: 0:16:16 lr: 0.000004 loss_cls: 3.9606 (3.8832) grad_norm: 2.4325 (2.4198) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-10 23:58:42 root] (utils.py 283): INFO Epoch: [23] [1240/2502] eta: 0:16:09 lr: 0.000004 loss_cls: 4.0265 (3.8850) grad_norm: 2.4443 (2.4203) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-10 23:58:50 root] (utils.py 283): INFO Epoch: [23] [1250/2502] eta: 0:16:01 lr: 0.000004 loss_cls: 4.2004 (3.8870) grad_norm: 2.4706 (2.4207) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-10 23:58:58 root] (utils.py 283): INFO Epoch: [23] [1260/2502] eta: 0:15:53 lr: 0.000004 loss_cls: 4.3366 (3.8886) grad_norm: 2.4828 (2.4219) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:05 root] (utils.py 283): INFO Epoch: [23] [1270/2502] eta: 0:15:45 lr: 0.000004 loss_cls: 3.9041 (3.8867) grad_norm: 2.4990 (2.4223) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:13 root] (utils.py 283): INFO Epoch: [23] [1280/2502] eta: 0:15:38 lr: 0.000004 loss_cls: 3.7818 (3.8864) grad_norm: 2.4666 (2.4224) time: 0.7582 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:21 root] (utils.py 283): INFO Epoch: [23] [1290/2502] eta: 0:15:30 lr: 0.000004 loss_cls: 3.7818 (3.8848) grad_norm: 2.4598 (2.4227) time: 0.7614 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:28 root] (utils.py 283): INFO Epoch: [23] [1300/2502] eta: 0:15:22 lr: 0.000004 loss_cls: 3.8645 (3.8850) grad_norm: 2.4172 (2.4230) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:36 root] (utils.py 283): INFO Epoch: [23] [1310/2502] eta: 0:15:15 lr: 0.000004 loss_cls: 3.8914 (3.8847) grad_norm: 2.3992 (2.4228) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:43 root] (utils.py 283): INFO Epoch: [23] [1320/2502] eta: 0:15:07 lr: 0.000004 loss_cls: 3.9742 (3.8858) grad_norm: 2.3965 (2.4226) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:51 root] (utils.py 283): INFO Epoch: [23] [1330/2502] eta: 0:14:59 lr: 0.000004 loss_cls: 3.9742 (3.8872) grad_norm: 2.4040 (2.4231) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-10 23:59:59 root] (utils.py 283): INFO Epoch: [23] [1340/2502] eta: 0:14:51 lr: 0.000004 loss_cls: 3.9144 (3.8882) grad_norm: 2.4420 (2.4235) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-11 00:00:06 root] (utils.py 283): INFO Epoch: [23] [1350/2502] eta: 0:14:44 lr: 0.000004 loss_cls: 3.8108 (3.8875) grad_norm: 2.4420 (2.4239) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-11 00:00:14 root] (utils.py 283): INFO Epoch: [23] [1360/2502] eta: 0:14:36 lr: 0.000004 loss_cls: 3.7386 (3.8859) grad_norm: 2.4112 (2.4237) time: 0.7677 data: 0.0002 max mem: 8426 +[2024-12-11 00:00:22 root] (utils.py 283): INFO Epoch: [23] [1370/2502] eta: 0:14:28 lr: 0.000004 loss_cls: 3.5451 (3.8840) grad_norm: 2.4583 (2.4245) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-11 00:00:29 root] (utils.py 283): INFO Epoch: [23] [1380/2502] eta: 0:14:21 lr: 0.000004 loss_cls: 3.8497 (3.8855) grad_norm: 2.4607 (2.4241) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-11 00:00:37 root] (utils.py 283): INFO Epoch: [23] [1390/2502] eta: 0:14:13 lr: 0.000004 loss_cls: 3.9935 (3.8847) grad_norm: 2.3567 (2.4237) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-11 00:00:45 root] (utils.py 283): INFO Epoch: [23] [1400/2502] eta: 0:14:05 lr: 0.000004 loss_cls: 3.5624 (3.8822) grad_norm: 2.3887 (2.4239) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-11 00:00:53 root] (utils.py 283): INFO Epoch: [23] [1410/2502] eta: 0:13:58 lr: 0.000004 loss_cls: 4.0023 (3.8848) grad_norm: 2.4056 (2.4237) time: 0.7784 data: 0.0003 max mem: 8426 +[2024-12-11 00:01:00 root] (utils.py 283): INFO Epoch: [23] [1420/2502] eta: 0:13:50 lr: 0.000004 loss_cls: 4.2102 (3.8870) grad_norm: 2.4056 (2.4236) time: 0.7758 data: 0.0002 max mem: 8426 +[2024-12-11 00:01:08 root] (utils.py 283): INFO Epoch: [23] [1430/2502] eta: 0:13:42 lr: 0.000004 loss_cls: 4.0124 (3.8853) grad_norm: 2.4538 (2.4243) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-11 00:01:16 root] (utils.py 283): INFO Epoch: [23] [1440/2502] eta: 0:13:35 lr: 0.000004 loss_cls: 3.7971 (3.8866) grad_norm: 2.3975 (2.4244) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-11 00:01:23 root] (utils.py 283): INFO Epoch: [23] [1450/2502] eta: 0:13:27 lr: 0.000004 loss_cls: 4.0126 (3.8859) grad_norm: 2.3879 (2.4242) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-11 00:01:31 root] (utils.py 283): INFO Epoch: [23] [1460/2502] eta: 0:13:19 lr: 0.000004 loss_cls: 4.0276 (3.8850) grad_norm: 2.3789 (2.4241) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:01:39 root] (utils.py 283): INFO Epoch: [23] [1470/2502] eta: 0:13:12 lr: 0.000004 loss_cls: 3.8782 (3.8846) grad_norm: 2.3535 (2.4238) time: 0.7697 data: 0.0003 max mem: 8426 +[2024-12-11 00:01:46 root] (utils.py 283): INFO Epoch: [23] [1480/2502] eta: 0:13:04 lr: 0.000004 loss_cls: 3.7577 (3.8840) grad_norm: 2.3658 (2.4235) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-11 00:01:54 root] (utils.py 283): INFO Epoch: [23] [1490/2502] eta: 0:12:56 lr: 0.000004 loss_cls: 3.7926 (3.8838) grad_norm: 2.3474 (2.4231) time: 0.7718 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:02 root] (utils.py 283): INFO Epoch: [23] [1500/2502] eta: 0:12:49 lr: 0.000004 loss_cls: 4.1768 (3.8861) grad_norm: 2.4613 (2.4234) time: 0.7742 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:09 root] (utils.py 283): INFO Epoch: [23] [1510/2502] eta: 0:12:41 lr: 0.000004 loss_cls: 4.0607 (3.8838) grad_norm: 2.4390 (2.4233) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:17 root] (utils.py 283): INFO Epoch: [23] [1520/2502] eta: 0:12:33 lr: 0.000004 loss_cls: 3.9292 (3.8834) grad_norm: 2.3992 (2.4231) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-11 00:02:24 root] (utils.py 283): INFO Epoch: [23] [1530/2502] eta: 0:12:26 lr: 0.000004 loss_cls: 4.0943 (3.8857) grad_norm: 2.4603 (2.4239) time: 0.7603 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:32 root] (utils.py 283): INFO Epoch: [23] [1540/2502] eta: 0:12:18 lr: 0.000004 loss_cls: 4.2873 (3.8863) grad_norm: 2.4797 (2.4239) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:40 root] (utils.py 283): INFO Epoch: [23] [1550/2502] eta: 0:12:10 lr: 0.000004 loss_cls: 4.1689 (3.8856) grad_norm: 2.4270 (2.4240) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:47 root] (utils.py 283): INFO Epoch: [23] [1560/2502] eta: 0:12:02 lr: 0.000004 loss_cls: 3.9805 (3.8854) grad_norm: 2.3637 (2.4237) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-11 00:02:55 root] (utils.py 283): INFO Epoch: [23] [1570/2502] eta: 0:11:55 lr: 0.000004 loss_cls: 3.9805 (3.8863) grad_norm: 2.3618 (2.4237) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:03 root] (utils.py 283): INFO Epoch: [23] [1580/2502] eta: 0:11:47 lr: 0.000004 loss_cls: 3.9500 (3.8867) grad_norm: 2.3644 (2.4233) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:10 root] (utils.py 283): INFO Epoch: [23] [1590/2502] eta: 0:11:39 lr: 0.000004 loss_cls: 4.0522 (3.8868) grad_norm: 2.3510 (2.4228) time: 0.7649 data: 0.0003 max mem: 8426 +[2024-12-11 00:03:18 root] (utils.py 283): INFO Epoch: [23] [1600/2502] eta: 0:11:32 lr: 0.000004 loss_cls: 3.8491 (3.8856) grad_norm: 2.3541 (2.4225) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:26 root] (utils.py 283): INFO Epoch: [23] [1610/2502] eta: 0:11:24 lr: 0.000004 loss_cls: 3.8049 (3.8859) grad_norm: 2.3870 (2.4228) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:33 root] (utils.py 283): INFO Epoch: [23] [1620/2502] eta: 0:11:16 lr: 0.000004 loss_cls: 3.8588 (3.8855) grad_norm: 2.4452 (2.4230) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:41 root] (utils.py 283): INFO Epoch: [23] [1630/2502] eta: 0:11:09 lr: 0.000004 loss_cls: 3.8588 (3.8864) grad_norm: 2.4351 (2.4234) time: 0.7571 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:48 root] (utils.py 283): INFO Epoch: [23] [1640/2502] eta: 0:11:01 lr: 0.000004 loss_cls: 3.7287 (3.8840) grad_norm: 2.4120 (2.4238) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-11 00:03:56 root] (utils.py 283): INFO Epoch: [23] [1650/2502] eta: 0:10:53 lr: 0.000004 loss_cls: 3.7992 (3.8845) grad_norm: 2.4282 (2.4237) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-11 00:04:04 root] (utils.py 283): INFO Epoch: [23] [1660/2502] eta: 0:10:45 lr: 0.000004 loss_cls: 4.1954 (3.8854) grad_norm: 2.4282 (2.4234) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-11 00:04:11 root] (utils.py 283): INFO Epoch: [23] [1670/2502] eta: 0:10:38 lr: 0.000004 loss_cls: 4.1972 (3.8874) grad_norm: 2.3866 (2.4232) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-11 00:04:19 root] (utils.py 283): INFO Epoch: [23] [1680/2502] eta: 0:10:30 lr: 0.000004 loss_cls: 4.2345 (3.8890) grad_norm: 2.4182 (2.4233) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-11 00:04:27 root] (utils.py 283): INFO Epoch: [23] [1690/2502] eta: 0:10:22 lr: 0.000004 loss_cls: 4.0487 (3.8885) grad_norm: 2.4416 (2.4230) time: 0.7609 data: 0.0003 max mem: 8426 +[2024-12-11 00:04:34 root] (utils.py 283): INFO Epoch: [23] [1700/2502] eta: 0:10:15 lr: 0.000004 loss_cls: 3.7022 (3.8874) grad_norm: 2.4253 (2.4235) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-11 00:04:42 root] (utils.py 283): INFO Epoch: [23] [1710/2502] eta: 0:10:07 lr: 0.000004 loss_cls: 3.7788 (3.8864) grad_norm: 2.3923 (2.4235) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-11 00:04:50 root] (utils.py 283): INFO Epoch: [23] [1720/2502] eta: 0:09:59 lr: 0.000004 loss_cls: 4.0268 (3.8865) grad_norm: 2.3922 (2.4236) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-11 00:04:57 root] (utils.py 283): INFO Epoch: [23] [1730/2502] eta: 0:09:52 lr: 0.000004 loss_cls: 3.6453 (3.8846) grad_norm: 2.3909 (2.4233) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-11 00:05:05 root] (utils.py 283): INFO Epoch: [23] [1740/2502] eta: 0:09:44 lr: 0.000004 loss_cls: 3.5727 (3.8836) grad_norm: 2.3909 (2.4236) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-11 00:05:13 root] (utils.py 283): INFO Epoch: [23] [1750/2502] eta: 0:09:36 lr: 0.000004 loss_cls: 3.9018 (3.8832) grad_norm: 2.4910 (2.4240) time: 0.7696 data: 0.0002 max mem: 8426 +[2024-12-11 00:05:20 root] (utils.py 283): INFO Epoch: [23] [1760/2502] eta: 0:09:29 lr: 0.000004 loss_cls: 4.0070 (3.8837) grad_norm: 2.4527 (2.4240) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-11 00:05:28 root] (utils.py 283): INFO Epoch: [23] [1770/2502] eta: 0:09:21 lr: 0.000004 loss_cls: 3.9318 (3.8834) grad_norm: 2.3598 (2.4234) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-11 00:05:36 root] (utils.py 283): INFO Epoch: [23] [1780/2502] eta: 0:09:13 lr: 0.000004 loss_cls: 3.9620 (3.8845) grad_norm: 2.3839 (2.4235) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-11 00:05:43 root] (utils.py 283): INFO Epoch: [23] [1790/2502] eta: 0:09:06 lr: 0.000004 loss_cls: 4.2516 (3.8846) grad_norm: 2.4314 (2.4237) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-11 00:05:51 root] (utils.py 283): INFO Epoch: [23] [1800/2502] eta: 0:08:58 lr: 0.000004 loss_cls: 3.9890 (3.8856) grad_norm: 2.4290 (2.4238) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-11 00:05:59 root] (utils.py 283): INFO Epoch: [23] [1810/2502] eta: 0:08:50 lr: 0.000004 loss_cls: 4.3143 (3.8879) grad_norm: 2.3521 (2.4236) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:06 root] (utils.py 283): INFO Epoch: [23] [1820/2502] eta: 0:08:43 lr: 0.000004 loss_cls: 4.3182 (3.8893) grad_norm: 2.3309 (2.4232) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:14 root] (utils.py 283): INFO Epoch: [23] [1830/2502] eta: 0:08:35 lr: 0.000004 loss_cls: 4.1360 (3.8893) grad_norm: 2.3351 (2.4230) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:21 root] (utils.py 283): INFO Epoch: [23] [1840/2502] eta: 0:08:27 lr: 0.000004 loss_cls: 3.9349 (3.8896) grad_norm: 2.3417 (2.4226) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:29 root] (utils.py 283): INFO Epoch: [23] [1850/2502] eta: 0:08:20 lr: 0.000004 loss_cls: 3.9349 (3.8903) grad_norm: 2.3665 (2.4226) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:37 root] (utils.py 283): INFO Epoch: [23] [1860/2502] eta: 0:08:12 lr: 0.000004 loss_cls: 3.9001 (3.8903) grad_norm: 2.2955 (2.4219) time: 0.7674 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:44 root] (utils.py 283): INFO Epoch: [23] [1870/2502] eta: 0:08:04 lr: 0.000004 loss_cls: 4.1476 (3.8913) grad_norm: 2.2545 (2.4215) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-11 00:06:52 root] (utils.py 283): INFO Epoch: [23] [1880/2502] eta: 0:07:57 lr: 0.000004 loss_cls: 4.1476 (3.8917) grad_norm: 2.3531 (2.4212) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:00 root] (utils.py 283): INFO Epoch: [23] [1890/2502] eta: 0:07:49 lr: 0.000004 loss_cls: 3.8943 (3.8914) grad_norm: 2.3662 (2.4210) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:07 root] (utils.py 283): INFO Epoch: [23] [1900/2502] eta: 0:07:41 lr: 0.000004 loss_cls: 3.9209 (3.8927) grad_norm: 2.3561 (2.4209) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:15 root] (utils.py 283): INFO Epoch: [23] [1910/2502] eta: 0:07:34 lr: 0.000004 loss_cls: 3.7244 (3.8909) grad_norm: 2.3464 (2.4203) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:23 root] (utils.py 283): INFO Epoch: [23] [1920/2502] eta: 0:07:26 lr: 0.000004 loss_cls: 3.7025 (3.8897) grad_norm: 2.3484 (2.4202) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:30 root] (utils.py 283): INFO Epoch: [23] [1930/2502] eta: 0:07:18 lr: 0.000004 loss_cls: 3.8289 (3.8885) grad_norm: 2.3769 (2.4202) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:38 root] (utils.py 283): INFO Epoch: [23] [1940/2502] eta: 0:07:10 lr: 0.000004 loss_cls: 3.9323 (3.8885) grad_norm: 2.4182 (2.4201) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-11 00:07:46 root] (utils.py 283): INFO Epoch: [23] [1950/2502] eta: 0:07:03 lr: 0.000004 loss_cls: 4.1295 (3.8892) grad_norm: 2.4539 (2.4204) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-11 00:07:53 root] (utils.py 283): INFO Epoch: [23] [1960/2502] eta: 0:06:55 lr: 0.000004 loss_cls: 4.2218 (3.8904) grad_norm: 2.4514 (2.4205) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-11 00:08:01 root] (utils.py 283): INFO Epoch: [23] [1970/2502] eta: 0:06:47 lr: 0.000004 loss_cls: 4.1550 (3.8912) grad_norm: 2.3770 (2.4203) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-11 00:08:09 root] (utils.py 283): INFO Epoch: [23] [1980/2502] eta: 0:06:40 lr: 0.000004 loss_cls: 4.0786 (3.8922) grad_norm: 2.3868 (2.4207) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-11 00:08:16 root] (utils.py 283): INFO Epoch: [23] [1990/2502] eta: 0:06:32 lr: 0.000004 loss_cls: 4.0271 (3.8919) grad_norm: 2.4082 (2.4206) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-11 00:08:24 root] (utils.py 283): INFO Epoch: [23] [2000/2502] eta: 0:06:24 lr: 0.000004 loss_cls: 3.8244 (3.8911) grad_norm: 2.4082 (2.4210) time: 0.7719 data: 0.0002 max mem: 8426 +[2024-12-11 00:08:32 root] (utils.py 283): INFO Epoch: [23] [2010/2502] eta: 0:06:17 lr: 0.000004 loss_cls: 4.1587 (3.8932) grad_norm: 2.4708 (2.4209) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-11 00:08:39 root] (utils.py 283): INFO Epoch: [23] [2020/2502] eta: 0:06:09 lr: 0.000004 loss_cls: 4.1956 (3.8928) grad_norm: 2.3822 (2.4211) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-11 00:08:47 root] (utils.py 283): INFO Epoch: [23] [2030/2502] eta: 0:06:01 lr: 0.000004 loss_cls: 3.9166 (3.8926) grad_norm: 2.4353 (2.4213) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-11 00:08:55 root] (utils.py 283): INFO Epoch: [23] [2040/2502] eta: 0:05:54 lr: 0.000004 loss_cls: 3.9540 (3.8923) grad_norm: 2.3779 (2.4210) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:02 root] (utils.py 283): INFO Epoch: [23] [2050/2502] eta: 0:05:46 lr: 0.000004 loss_cls: 3.9842 (3.8931) grad_norm: 2.3893 (2.4214) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:10 root] (utils.py 283): INFO Epoch: [23] [2060/2502] eta: 0:05:38 lr: 0.000004 loss_cls: 4.0742 (3.8929) grad_norm: 2.4651 (2.4213) time: 0.7653 data: 0.0003 max mem: 8426 +[2024-12-11 00:09:18 root] (utils.py 283): INFO Epoch: [23] [2070/2502] eta: 0:05:31 lr: 0.000004 loss_cls: 4.1144 (3.8927) grad_norm: 2.4333 (2.4214) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:25 root] (utils.py 283): INFO Epoch: [23] [2080/2502] eta: 0:05:23 lr: 0.000004 loss_cls: 4.1144 (3.8936) grad_norm: 2.4065 (2.4215) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:33 root] (utils.py 283): INFO Epoch: [23] [2090/2502] eta: 0:05:15 lr: 0.000004 loss_cls: 4.1044 (3.8932) grad_norm: 2.4421 (2.4219) time: 0.7667 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:41 root] (utils.py 283): INFO Epoch: [23] [2100/2502] eta: 0:05:08 lr: 0.000004 loss_cls: 3.8123 (3.8933) grad_norm: 2.5016 (2.4221) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:48 root] (utils.py 283): INFO Epoch: [23] [2110/2502] eta: 0:05:00 lr: 0.000004 loss_cls: 3.6852 (3.8917) grad_norm: 2.4250 (2.4219) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-11 00:09:56 root] (utils.py 283): INFO Epoch: [23] [2120/2502] eta: 0:04:52 lr: 0.000004 loss_cls: 3.5014 (3.8904) grad_norm: 2.3821 (2.4219) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-11 00:10:03 root] (utils.py 283): INFO Epoch: [23] [2130/2502] eta: 0:04:45 lr: 0.000004 loss_cls: 3.7843 (3.8900) grad_norm: 2.3924 (2.4218) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-11 00:10:11 root] (utils.py 283): INFO Epoch: [23] [2140/2502] eta: 0:04:37 lr: 0.000004 loss_cls: 3.7043 (3.8894) grad_norm: 2.4161 (2.4220) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-11 00:10:19 root] (utils.py 283): INFO Epoch: [23] [2150/2502] eta: 0:04:29 lr: 0.000004 loss_cls: 3.5432 (3.8877) grad_norm: 2.4486 (2.4220) time: 0.7682 data: 0.0002 max mem: 8426 +[2024-12-11 00:10:26 root] (utils.py 283): INFO Epoch: [23] [2160/2502] eta: 0:04:22 lr: 0.000004 loss_cls: 3.5553 (3.8868) grad_norm: 2.4184 (2.4222) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-11 00:10:34 root] (utils.py 283): INFO Epoch: [23] [2170/2502] eta: 0:04:14 lr: 0.000004 loss_cls: 3.8366 (3.8868) grad_norm: 2.3928 (2.4219) time: 0.7672 data: 0.0002 max mem: 8426 +[2024-12-11 00:10:42 root] (utils.py 283): INFO Epoch: [23] [2180/2502] eta: 0:04:06 lr: 0.000004 loss_cls: 3.8932 (3.8870) grad_norm: 2.4549 (2.4220) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-11 00:10:49 root] (utils.py 283): INFO Epoch: [23] [2190/2502] eta: 0:03:59 lr: 0.000004 loss_cls: 3.3610 (3.8845) grad_norm: 2.4570 (2.4221) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-11 00:10:57 root] (utils.py 283): INFO Epoch: [23] [2200/2502] eta: 0:03:51 lr: 0.000004 loss_cls: 3.7206 (3.8858) grad_norm: 2.4861 (2.4225) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-11 00:11:05 root] (utils.py 283): INFO Epoch: [23] [2210/2502] eta: 0:03:43 lr: 0.000004 loss_cls: 4.1700 (3.8859) grad_norm: 2.4808 (2.4224) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:12 root] (utils.py 283): INFO Epoch: [23] [2220/2502] eta: 0:03:36 lr: 0.000004 loss_cls: 3.9588 (3.8853) grad_norm: 2.3747 (2.4223) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:20 root] (utils.py 283): INFO Epoch: [23] [2230/2502] eta: 0:03:28 lr: 0.000004 loss_cls: 3.9156 (3.8851) grad_norm: 2.4195 (2.4224) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:28 root] (utils.py 283): INFO Epoch: [23] [2240/2502] eta: 0:03:20 lr: 0.000004 loss_cls: 3.8274 (3.8844) grad_norm: 2.4195 (2.4221) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:36 root] (utils.py 283): INFO Epoch: [23] [2250/2502] eta: 0:03:13 lr: 0.000004 loss_cls: 3.7333 (3.8837) grad_norm: 2.3574 (2.4220) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:43 root] (utils.py 283): INFO Epoch: [23] [2260/2502] eta: 0:03:05 lr: 0.000004 loss_cls: 4.1286 (3.8854) grad_norm: 2.4205 (2.4222) time: 0.7721 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:51 root] (utils.py 283): INFO Epoch: [23] [2270/2502] eta: 0:02:57 lr: 0.000004 loss_cls: 4.2211 (3.8855) grad_norm: 2.4730 (2.4224) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-11 00:11:59 root] (utils.py 283): INFO Epoch: [23] [2280/2502] eta: 0:02:50 lr: 0.000004 loss_cls: 4.0232 (3.8855) grad_norm: 2.4935 (2.4225) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-11 00:12:06 root] (utils.py 283): INFO Epoch: [23] [2290/2502] eta: 0:02:42 lr: 0.000004 loss_cls: 4.0690 (3.8857) grad_norm: 2.4344 (2.4227) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-11 00:12:14 root] (utils.py 283): INFO Epoch: [23] [2300/2502] eta: 0:02:34 lr: 0.000004 loss_cls: 3.7357 (3.8842) grad_norm: 2.4932 (2.4233) time: 0.7612 data: 0.0002 max mem: 8426 +[2024-12-11 00:12:21 root] (utils.py 283): INFO Epoch: [23] [2310/2502] eta: 0:02:27 lr: 0.000004 loss_cls: 3.7357 (3.8839) grad_norm: 2.4788 (2.4232) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-11 00:12:29 root] (utils.py 283): INFO Epoch: [23] [2320/2502] eta: 0:02:19 lr: 0.000004 loss_cls: 4.1988 (3.8852) grad_norm: 2.4191 (2.4233) time: 0.7585 data: 0.0002 max mem: 8426 +[2024-12-11 00:12:37 root] (utils.py 283): INFO Epoch: [23] [2330/2502] eta: 0:02:11 lr: 0.000004 loss_cls: 4.0965 (3.8845) grad_norm: 2.4208 (2.4233) time: 0.7574 data: 0.0002 max mem: 8426 +[2024-12-11 00:12:44 root] (utils.py 283): INFO Epoch: [23] [2340/2502] eta: 0:02:04 lr: 0.000004 loss_cls: 3.9287 (3.8843) grad_norm: 2.3878 (2.4234) time: 0.7589 data: 0.0002 max mem: 8426 +[2024-12-11 00:12:52 root] (utils.py 283): INFO Epoch: [23] [2350/2502] eta: 0:01:56 lr: 0.000004 loss_cls: 4.0544 (3.8849) grad_norm: 2.3985 (2.4235) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-11 00:12:59 root] (utils.py 283): INFO Epoch: [23] [2360/2502] eta: 0:01:48 lr: 0.000004 loss_cls: 4.1804 (3.8853) grad_norm: 2.4032 (2.4234) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-11 00:13:07 root] (utils.py 283): INFO Epoch: [23] [2370/2502] eta: 0:01:41 lr: 0.000004 loss_cls: 3.9576 (3.8844) grad_norm: 2.3924 (2.4233) time: 0.7658 data: 0.0003 max mem: 8426 +[2024-12-11 00:13:15 root] (utils.py 283): INFO Epoch: [23] [2380/2502] eta: 0:01:33 lr: 0.000004 loss_cls: 3.8350 (3.8841) grad_norm: 2.3846 (2.4232) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-11 00:13:22 root] (utils.py 283): INFO Epoch: [23] [2390/2502] eta: 0:01:25 lr: 0.000004 loss_cls: 3.8964 (3.8838) grad_norm: 2.3348 (2.4230) time: 0.7599 data: 0.0002 max mem: 8426 +[2024-12-11 00:13:30 root] (utils.py 283): INFO Epoch: [23] [2400/2502] eta: 0:01:18 lr: 0.000004 loss_cls: 3.9408 (3.8847) grad_norm: 2.3416 (2.4228) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-11 00:13:38 root] (utils.py 283): INFO Epoch: [23] [2410/2502] eta: 0:01:10 lr: 0.000004 loss_cls: 4.0944 (3.8852) grad_norm: 2.4055 (2.4228) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-11 00:13:45 root] (utils.py 283): INFO Epoch: [23] [2420/2502] eta: 0:01:02 lr: 0.000004 loss_cls: 4.1082 (3.8859) grad_norm: 2.4438 (2.4230) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-11 00:13:53 root] (utils.py 283): INFO Epoch: [23] [2430/2502] eta: 0:00:55 lr: 0.000004 loss_cls: 4.1531 (3.8864) grad_norm: 2.4036 (2.4228) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-11 00:14:01 root] (utils.py 283): INFO Epoch: [23] [2440/2502] eta: 0:00:47 lr: 0.000004 loss_cls: 4.1153 (3.8862) grad_norm: 2.4036 (2.4230) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:14:08 root] (utils.py 283): INFO Epoch: [23] [2450/2502] eta: 0:00:39 lr: 0.000004 loss_cls: 4.1153 (3.8871) grad_norm: 2.4266 (2.4230) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-11 00:14:16 root] (utils.py 283): INFO Epoch: [23] [2460/2502] eta: 0:00:32 lr: 0.000004 loss_cls: 4.1249 (3.8861) grad_norm: 2.3940 (2.4232) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-11 00:14:24 root] (utils.py 283): INFO Epoch: [23] [2470/2502] eta: 0:00:24 lr: 0.000004 loss_cls: 4.0914 (3.8864) grad_norm: 2.4552 (2.4233) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-11 00:14:31 root] (utils.py 283): INFO Epoch: [23] [2480/2502] eta: 0:00:16 lr: 0.000004 loss_cls: 4.0615 (3.8864) grad_norm: 2.4552 (2.4234) time: 0.7806 data: 0.0003 max mem: 8426 +[2024-12-11 00:14:40 root] (utils.py 283): INFO Epoch: [23] [2490/2502] eta: 0:00:09 lr: 0.000004 loss_cls: 4.0290 (3.8866) grad_norm: 2.4468 (2.4233) time: 0.8061 data: 0.0234 max mem: 8426 +[2024-12-11 00:14:48 root] (utils.py 283): INFO Epoch: [23] [2500/2502] eta: 0:00:01 lr: 0.000004 loss_cls: 4.0409 (3.8878) grad_norm: 2.4598 (2.4235) time: 0.8050 data: 0.0234 max mem: 8426 +[2024-12-11 00:14:48 root] (utils.py 283): INFO Epoch: [23] [2501/2502] eta: 0:00:00 lr: 0.000004 loss_cls: 4.0409 (3.8877) grad_norm: 2.4781 (2.4235) time: 0.8039 data: 0.0234 max mem: 8426 +[2024-12-11 00:14:48 root] (utils.py 297): INFO Epoch: [23] Total time: 0:31:59 (0.7670 s / it) +[2024-12-11 00:14:48 root] (engine.py 179): INFO Averaged stats:lr: 0.000004 loss_cls: 4.0409 (3.8900) grad_norm: 2.4781 (2.4235) +[2024-12-11 00:14:49 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6435 (0.6435) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 98.4375 (98.4375) time: 0.1275 data: 0.0004 max mem: 8426 +[2024-12-11 00:14:50 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7225 (0.8070) acc1: 85.9375 (82.8835) acc3: 95.3125 (93.8210) acc5: 96.8750 (96.5909) time: 0.1278 data: 0.0004 max mem: 8426 +[2024-12-11 00:14:51 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8616 (0.8607) acc1: 79.6875 (81.3244) acc3: 92.9688 (93.1176) acc5: 95.3125 (95.5729) time: 0.1282 data: 0.0004 max mem: 8426 +[2024-12-11 00:14:53 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9464 (0.8751) acc1: 78.9062 (80.5444) acc3: 92.9688 (93.3216) acc5: 95.3125 (95.7409) time: 0.1286 data: 0.0004 max mem: 8426 +[2024-12-11 00:14:54 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7995 (0.8632) acc1: 80.4688 (80.9832) acc3: 94.5312 (93.3880) acc5: 96.8750 (95.8651) time: 0.1402 data: 0.0120 max mem: 8426 +[2024-12-11 00:14:56 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0401 (0.9477) acc1: 75.0000 (78.9675) acc3: 88.2812 (91.8811) acc5: 92.1875 (94.7610) time: 0.1401 data: 0.0120 max mem: 8426 +[2024-12-11 00:14:57 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1948 (0.9923) acc1: 72.6562 (78.2018) acc3: 85.1562 (90.9580) acc5: 89.0625 (93.9677) time: 0.1300 data: 0.0019 max mem: 8426 +[2024-12-11 00:14:59 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1868 (1.0319) acc1: 75.0000 (77.3217) acc3: 85.1562 (90.4159) acc5: 89.0625 (93.5079) time: 0.1486 data: 0.0196 max mem: 8426 +[2024-12-11 00:15:00 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1983 (1.0671) acc1: 73.4375 (76.4757) acc3: 86.7188 (89.8534) acc5: 89.8438 (92.9977) time: 0.1632 data: 0.0345 max mem: 8426 +[2024-12-11 00:15:02 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2055 (1.0947) acc1: 71.0938 (75.7383) acc3: 85.9375 (89.4918) acc5: 89.8438 (92.7112) time: 0.1593 data: 0.0310 max mem: 8426 +[2024-12-11 00:15:03 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1299 (1.0834) acc1: 75.0000 (76.0160) acc3: 88.2812 (89.6800) acc5: 90.6250 (92.8960) time: 0.1589 data: 0.0304 max mem: 8426 +[2024-12-11 00:15:03 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1446 s / it) +[2024-12-11 00:15:03 root] (engine.py 264): INFO * Acc@1 75.840 Acc@3 89.762 Acc@5 92.918 loss 1.084 flops 1.285 layer_flops 1.251 +[2024-12-11 00:15:03 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.8% +[2024-12-11 00:15:03 root] (main.py 576): INFO Max accuracy: 75.88% +[2024-12-11 00:15:04 root] (utils.py 283): INFO Epoch: [24] [ 0/2502] eta: 0:32:13 lr: 0.000003 loss_cls: 4.2821 (4.2821) grad_norm: 2.3369 (2.3369) time: 0.7729 data: 0.0004 max mem: 8426 +[2024-12-11 00:15:11 root] (utils.py 283): INFO Epoch: [24] [ 10/2502] eta: 0:31:39 lr: 0.000003 loss_cls: 4.2584 (4.2165) grad_norm: 2.3857 (2.4078) time: 0.7622 data: 0.0003 max mem: 8426 +[2024-12-11 00:15:19 root] (utils.py 283): INFO Epoch: [24] [ 20/2502] eta: 0:31:35 lr: 0.000003 loss_cls: 3.8869 (4.0489) grad_norm: 2.4050 (2.4384) time: 0.7633 data: 0.0002 max mem: 8426 +[2024-12-11 00:15:27 root] (utils.py 283): INFO Epoch: [24] [ 30/2502] eta: 0:31:26 lr: 0.000003 loss_cls: 3.8749 (3.9457) grad_norm: 2.4103 (2.4447) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-11 00:15:34 root] (utils.py 283): INFO Epoch: [24] [ 40/2502] eta: 0:31:19 lr: 0.000003 loss_cls: 3.9689 (3.9579) grad_norm: 2.3881 (2.4459) time: 0.7629 data: 0.0002 max mem: 8426 +[2024-12-11 00:15:42 root] (utils.py 283): INFO Epoch: [24] [ 50/2502] eta: 0:31:12 lr: 0.000003 loss_cls: 4.0965 (3.9478) grad_norm: 2.3982 (2.4442) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:15:50 root] (utils.py 283): INFO Epoch: [24] [ 60/2502] eta: 0:31:08 lr: 0.000003 loss_cls: 4.0254 (3.9321) grad_norm: 2.3989 (2.4342) time: 0.7686 data: 0.0002 max mem: 8426 +[2024-12-11 00:15:57 root] (utils.py 283): INFO Epoch: [24] [ 70/2502] eta: 0:31:00 lr: 0.000003 loss_cls: 4.0307 (3.9485) grad_norm: 2.4261 (2.4412) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-11 00:16:05 root] (utils.py 283): INFO Epoch: [24] [ 80/2502] eta: 0:30:51 lr: 0.000003 loss_cls: 4.0692 (3.9519) grad_norm: 2.4648 (2.4459) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-11 00:16:13 root] (utils.py 283): INFO Epoch: [24] [ 90/2502] eta: 0:30:43 lr: 0.000003 loss_cls: 4.0759 (3.9675) grad_norm: 2.4795 (2.4480) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-11 00:16:20 root] (utils.py 283): INFO Epoch: [24] [ 100/2502] eta: 0:30:35 lr: 0.000003 loss_cls: 4.0048 (3.9573) grad_norm: 2.3933 (2.4408) time: 0.7634 data: 0.0003 max mem: 8426 +[2024-12-11 00:16:28 root] (utils.py 283): INFO Epoch: [24] [ 110/2502] eta: 0:30:27 lr: 0.000003 loss_cls: 3.8369 (3.9562) grad_norm: 2.3407 (2.4321) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-11 00:16:35 root] (utils.py 283): INFO Epoch: [24] [ 120/2502] eta: 0:30:20 lr: 0.000003 loss_cls: 3.8370 (3.9525) grad_norm: 2.3472 (2.4312) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-11 00:16:43 root] (utils.py 283): INFO Epoch: [24] [ 130/2502] eta: 0:30:13 lr: 0.000003 loss_cls: 4.1003 (3.9508) grad_norm: 2.4441 (2.4345) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-11 00:16:51 root] (utils.py 283): INFO Epoch: [24] [ 140/2502] eta: 0:30:05 lr: 0.000003 loss_cls: 4.1586 (3.9545) grad_norm: 2.4873 (2.4368) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-11 00:16:58 root] (utils.py 283): INFO Epoch: [24] [ 150/2502] eta: 0:29:57 lr: 0.000003 loss_cls: 3.8250 (3.9253) grad_norm: 2.3659 (2.4318) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-11 00:17:06 root] (utils.py 283): INFO Epoch: [24] [ 160/2502] eta: 0:29:49 lr: 0.000003 loss_cls: 3.8041 (3.9386) grad_norm: 2.4299 (2.4349) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-11 00:17:14 root] (utils.py 283): INFO Epoch: [24] [ 170/2502] eta: 0:29:42 lr: 0.000003 loss_cls: 4.1273 (3.9418) grad_norm: 2.4235 (2.4333) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-11 00:17:21 root] (utils.py 283): INFO Epoch: [24] [ 180/2502] eta: 0:29:35 lr: 0.000003 loss_cls: 3.9877 (3.9317) grad_norm: 2.3771 (2.4320) time: 0.7687 data: 0.0003 max mem: 8426 +[2024-12-11 00:17:29 root] (utils.py 283): INFO Epoch: [24] [ 190/2502] eta: 0:29:28 lr: 0.000003 loss_cls: 3.7204 (3.9359) grad_norm: 2.3818 (2.4290) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-11 00:17:37 root] (utils.py 283): INFO Epoch: [24] [ 200/2502] eta: 0:29:20 lr: 0.000003 loss_cls: 3.9879 (3.9339) grad_norm: 2.3902 (2.4286) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-11 00:17:44 root] (utils.py 283): INFO Epoch: [24] [ 210/2502] eta: 0:29:13 lr: 0.000003 loss_cls: 3.9879 (3.9286) grad_norm: 2.4387 (2.4335) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-11 00:17:52 root] (utils.py 283): INFO Epoch: [24] [ 220/2502] eta: 0:29:04 lr: 0.000003 loss_cls: 4.0232 (3.9246) grad_norm: 2.4860 (2.4346) time: 0.7615 data: 0.0002 max mem: 8426 +[2024-12-11 00:18:00 root] (utils.py 283): INFO Epoch: [24] [ 230/2502] eta: 0:28:57 lr: 0.000003 loss_cls: 4.0232 (3.9227) grad_norm: 2.4136 (2.4351) time: 0.7617 data: 0.0002 max mem: 8426 +[2024-12-11 00:18:07 root] (utils.py 283): INFO Epoch: [24] [ 240/2502] eta: 0:28:48 lr: 0.000003 loss_cls: 3.8370 (3.9139) grad_norm: 2.3681 (2.4316) time: 0.7606 data: 0.0002 max mem: 8426 +[2024-12-11 00:18:15 root] (utils.py 283): INFO Epoch: [24] [ 250/2502] eta: 0:28:41 lr: 0.000003 loss_cls: 4.0772 (3.9209) grad_norm: 2.3298 (2.4298) time: 0.7604 data: 0.0003 max mem: 8426 +[2024-12-11 00:18:23 root] (utils.py 283): INFO Epoch: [24] [ 260/2502] eta: 0:28:34 lr: 0.000003 loss_cls: 4.1090 (3.9144) grad_norm: 2.3838 (2.4277) time: 0.7688 data: 0.0003 max mem: 8426 +[2024-12-11 00:18:30 root] (utils.py 283): INFO Epoch: [24] [ 270/2502] eta: 0:28:28 lr: 0.000003 loss_cls: 3.8438 (3.9099) grad_norm: 2.3997 (2.4272) time: 0.7794 data: 0.0002 max mem: 8426 +[2024-12-11 00:18:38 root] (utils.py 283): INFO Epoch: [24] [ 280/2502] eta: 0:28:20 lr: 0.000003 loss_cls: 3.8438 (3.9070) grad_norm: 2.4289 (2.4265) time: 0.7746 data: 0.0002 max mem: 8426 +[2024-12-11 00:18:46 root] (utils.py 283): INFO Epoch: [24] [ 290/2502] eta: 0:28:12 lr: 0.000003 loss_cls: 4.0244 (3.9011) grad_norm: 2.3882 (2.4268) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-11 00:18:53 root] (utils.py 283): INFO Epoch: [24] [ 300/2502] eta: 0:28:05 lr: 0.000003 loss_cls: 4.0277 (3.9072) grad_norm: 2.3862 (2.4255) time: 0.7697 data: 0.0002 max mem: 8426 +[2024-12-11 00:19:01 root] (utils.py 283): INFO Epoch: [24] [ 310/2502] eta: 0:27:57 lr: 0.000003 loss_cls: 4.1083 (3.9063) grad_norm: 2.4203 (2.4256) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-11 00:19:09 root] (utils.py 283): INFO Epoch: [24] [ 320/2502] eta: 0:27:50 lr: 0.000003 loss_cls: 3.8845 (3.9099) grad_norm: 2.4063 (2.4242) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-11 00:19:16 root] (utils.py 283): INFO Epoch: [24] [ 330/2502] eta: 0:27:42 lr: 0.000003 loss_cls: 3.7724 (3.9019) grad_norm: 2.4063 (2.4257) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-11 00:19:24 root] (utils.py 283): INFO Epoch: [24] [ 340/2502] eta: 0:27:34 lr: 0.000003 loss_cls: 3.8106 (3.9060) grad_norm: 2.4640 (2.4249) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-11 00:19:32 root] (utils.py 283): INFO Epoch: [24] [ 350/2502] eta: 0:27:27 lr: 0.000003 loss_cls: 4.0951 (3.9053) grad_norm: 2.3266 (2.4216) time: 0.7696 data: 0.0003 max mem: 8426 +[2024-12-11 00:19:39 root] (utils.py 283): INFO Epoch: [24] [ 360/2502] eta: 0:27:19 lr: 0.000003 loss_cls: 4.0951 (3.9058) grad_norm: 2.3538 (2.4210) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-11 00:19:47 root] (utils.py 283): INFO Epoch: [24] [ 370/2502] eta: 0:27:12 lr: 0.000003 loss_cls: 3.9479 (3.9023) grad_norm: 2.3543 (2.4198) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-11 00:19:55 root] (utils.py 283): INFO Epoch: [24] [ 380/2502] eta: 0:27:03 lr: 0.000003 loss_cls: 3.7658 (3.8965) grad_norm: 2.3347 (2.4195) time: 0.7605 data: 0.0002 max mem: 8426 +[2024-12-11 00:20:02 root] (utils.py 283): INFO Epoch: [24] [ 390/2502] eta: 0:26:55 lr: 0.000003 loss_cls: 3.9417 (3.9040) grad_norm: 2.3849 (2.4212) time: 0.7581 data: 0.0002 max mem: 8426 +[2024-12-11 00:20:10 root] (utils.py 283): INFO Epoch: [24] [ 400/2502] eta: 0:26:48 lr: 0.000003 loss_cls: 4.2263 (3.9060) grad_norm: 2.4358 (2.4245) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-11 00:20:17 root] (utils.py 283): INFO Epoch: [24] [ 410/2502] eta: 0:26:40 lr: 0.000003 loss_cls: 4.0838 (3.9049) grad_norm: 2.4390 (2.4251) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-11 00:20:25 root] (utils.py 283): INFO Epoch: [24] [ 420/2502] eta: 0:26:33 lr: 0.000003 loss_cls: 4.0838 (3.9063) grad_norm: 2.4390 (2.4258) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-11 00:20:33 root] (utils.py 283): INFO Epoch: [24] [ 430/2502] eta: 0:26:25 lr: 0.000003 loss_cls: 3.9816 (3.9004) grad_norm: 2.4357 (2.4265) time: 0.7703 data: 0.0003 max mem: 8426 +[2024-12-11 00:20:40 root] (utils.py 283): INFO Epoch: [24] [ 440/2502] eta: 0:26:17 lr: 0.000003 loss_cls: 3.8383 (3.8993) grad_norm: 2.4344 (2.4268) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-11 00:20:48 root] (utils.py 283): INFO Epoch: [24] [ 450/2502] eta: 0:26:10 lr: 0.000003 loss_cls: 3.6902 (3.8958) grad_norm: 2.4117 (2.4255) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-11 00:20:56 root] (utils.py 283): INFO Epoch: [24] [ 460/2502] eta: 0:26:02 lr: 0.000003 loss_cls: 3.6902 (3.8912) grad_norm: 2.3403 (2.4245) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-11 00:21:03 root] (utils.py 283): INFO Epoch: [24] [ 470/2502] eta: 0:25:54 lr: 0.000003 loss_cls: 4.0174 (3.8959) grad_norm: 2.4390 (2.4259) time: 0.7597 data: 0.0002 max mem: 8426 +[2024-12-11 00:21:11 root] (utils.py 283): INFO Epoch: [24] [ 480/2502] eta: 0:25:46 lr: 0.000003 loss_cls: 4.1114 (3.8994) grad_norm: 2.4953 (2.4285) time: 0.7591 data: 0.0003 max mem: 8426 +[2024-12-11 00:21:19 root] (utils.py 283): INFO Epoch: [24] [ 490/2502] eta: 0:25:39 lr: 0.000003 loss_cls: 4.1849 (3.9036) grad_norm: 2.4614 (2.4288) time: 0.7689 data: 0.0003 max mem: 8426 +[2024-12-11 00:21:26 root] (utils.py 283): INFO Epoch: [24] [ 500/2502] eta: 0:25:31 lr: 0.000003 loss_cls: 4.2853 (3.9120) grad_norm: 2.4338 (2.4289) time: 0.7716 data: 0.0002 max mem: 8426 +[2024-12-11 00:21:34 root] (utils.py 283): INFO Epoch: [24] [ 510/2502] eta: 0:25:24 lr: 0.000003 loss_cls: 4.1714 (3.9132) grad_norm: 2.4353 (2.4290) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-11 00:21:42 root] (utils.py 283): INFO Epoch: [24] [ 520/2502] eta: 0:25:16 lr: 0.000003 loss_cls: 3.7673 (3.9071) grad_norm: 2.4099 (2.4284) time: 0.7645 data: 0.0002 max mem: 8426 +[2024-12-11 00:21:49 root] (utils.py 283): INFO Epoch: [24] [ 530/2502] eta: 0:25:09 lr: 0.000003 loss_cls: 3.6814 (3.9037) grad_norm: 2.3350 (2.4272) time: 0.7711 data: 0.0002 max mem: 8426 +[2024-12-11 00:21:57 root] (utils.py 283): INFO Epoch: [24] [ 540/2502] eta: 0:25:02 lr: 0.000003 loss_cls: 4.0340 (3.9096) grad_norm: 2.3439 (2.4265) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-11 00:22:05 root] (utils.py 283): INFO Epoch: [24] [ 550/2502] eta: 0:24:54 lr: 0.000003 loss_cls: 4.1240 (3.9083) grad_norm: 2.3837 (2.4269) time: 0.7756 data: 0.0002 max mem: 8426 +[2024-12-11 00:22:13 root] (utils.py 283): INFO Epoch: [24] [ 560/2502] eta: 0:24:46 lr: 0.000003 loss_cls: 3.8768 (3.9078) grad_norm: 2.4521 (2.4270) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-11 00:22:20 root] (utils.py 283): INFO Epoch: [24] [ 570/2502] eta: 0:24:39 lr: 0.000003 loss_cls: 3.9366 (3.9096) grad_norm: 2.4521 (2.4271) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-11 00:22:28 root] (utils.py 283): INFO Epoch: [24] [ 580/2502] eta: 0:24:31 lr: 0.000003 loss_cls: 3.8130 (3.9052) grad_norm: 2.3671 (2.4259) time: 0.7692 data: 0.0002 max mem: 8426 +[2024-12-11 00:22:36 root] (utils.py 283): INFO Epoch: [24] [ 590/2502] eta: 0:24:24 lr: 0.000003 loss_cls: 3.7286 (3.9083) grad_norm: 2.3558 (2.4258) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-11 00:22:43 root] (utils.py 283): INFO Epoch: [24] [ 600/2502] eta: 0:24:16 lr: 0.000003 loss_cls: 4.3222 (3.9100) grad_norm: 2.3990 (2.4253) time: 0.7695 data: 0.0003 max mem: 8426 +[2024-12-11 00:22:51 root] (utils.py 283): INFO Epoch: [24] [ 610/2502] eta: 0:24:08 lr: 0.000003 loss_cls: 3.9837 (3.9097) grad_norm: 2.4068 (2.4255) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-11 00:22:59 root] (utils.py 283): INFO Epoch: [24] [ 620/2502] eta: 0:24:01 lr: 0.000003 loss_cls: 3.9037 (3.9092) grad_norm: 2.3994 (2.4250) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-11 00:23:06 root] (utils.py 283): INFO Epoch: [24] [ 630/2502] eta: 0:23:53 lr: 0.000003 loss_cls: 4.1629 (3.9090) grad_norm: 2.3565 (2.4240) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-11 00:23:14 root] (utils.py 283): INFO Epoch: [24] [ 640/2502] eta: 0:23:45 lr: 0.000003 loss_cls: 4.1655 (3.9125) grad_norm: 2.3607 (2.4232) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-11 00:23:22 root] (utils.py 283): INFO Epoch: [24] [ 650/2502] eta: 0:23:38 lr: 0.000003 loss_cls: 4.1655 (3.9132) grad_norm: 2.3909 (2.4229) time: 0.7600 data: 0.0002 max mem: 8426 +[2024-12-11 00:23:29 root] (utils.py 283): INFO Epoch: [24] [ 660/2502] eta: 0:23:30 lr: 0.000003 loss_cls: 4.0241 (3.9102) grad_norm: 2.4240 (2.4234) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-11 00:23:37 root] (utils.py 283): INFO Epoch: [24] [ 670/2502] eta: 0:23:22 lr: 0.000003 loss_cls: 3.9350 (3.9102) grad_norm: 2.3932 (2.4225) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-11 00:23:44 root] (utils.py 283): INFO Epoch: [24] [ 680/2502] eta: 0:23:14 lr: 0.000003 loss_cls: 3.9350 (3.9106) grad_norm: 2.4140 (2.4232) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-11 00:23:52 root] (utils.py 283): INFO Epoch: [24] [ 690/2502] eta: 0:23:07 lr: 0.000003 loss_cls: 4.2222 (3.9144) grad_norm: 2.4594 (2.4237) time: 0.7620 data: 0.0002 max mem: 8426 +[2024-12-11 00:24:00 root] (utils.py 283): INFO Epoch: [24] [ 700/2502] eta: 0:22:59 lr: 0.000003 loss_cls: 4.2399 (3.9161) grad_norm: 2.4379 (2.4245) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-11 00:24:07 root] (utils.py 283): INFO Epoch: [24] [ 710/2502] eta: 0:22:51 lr: 0.000003 loss_cls: 4.2074 (3.9195) grad_norm: 2.4271 (2.4250) time: 0.7682 data: 0.0003 max mem: 8426 +[2024-12-11 00:24:15 root] (utils.py 283): INFO Epoch: [24] [ 720/2502] eta: 0:22:44 lr: 0.000003 loss_cls: 3.8650 (3.9142) grad_norm: 2.4083 (2.4254) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-11 00:24:23 root] (utils.py 283): INFO Epoch: [24] [ 730/2502] eta: 0:22:36 lr: 0.000003 loss_cls: 3.4749 (3.9126) grad_norm: 2.4029 (2.4246) time: 0.7612 data: 0.0003 max mem: 8426 +[2024-12-11 00:24:30 root] (utils.py 283): INFO Epoch: [24] [ 740/2502] eta: 0:22:28 lr: 0.000003 loss_cls: 3.9198 (3.9115) grad_norm: 2.3713 (2.4236) time: 0.7610 data: 0.0003 max mem: 8426 +[2024-12-11 00:24:38 root] (utils.py 283): INFO Epoch: [24] [ 750/2502] eta: 0:22:21 lr: 0.000003 loss_cls: 4.1838 (3.9122) grad_norm: 2.3713 (2.4235) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-11 00:24:46 root] (utils.py 283): INFO Epoch: [24] [ 760/2502] eta: 0:22:13 lr: 0.000003 loss_cls: 4.2161 (3.9142) grad_norm: 2.4231 (2.4230) time: 0.7706 data: 0.0003 max mem: 8426 +[2024-12-11 00:24:53 root] (utils.py 283): INFO Epoch: [24] [ 770/2502] eta: 0:22:06 lr: 0.000003 loss_cls: 4.0608 (3.9121) grad_norm: 2.4342 (2.4235) time: 0.7727 data: 0.0003 max mem: 8426 +[2024-12-11 00:25:01 root] (utils.py 283): INFO Epoch: [24] [ 780/2502] eta: 0:21:58 lr: 0.000003 loss_cls: 4.0608 (3.9146) grad_norm: 2.4467 (2.4235) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-11 00:25:09 root] (utils.py 283): INFO Epoch: [24] [ 790/2502] eta: 0:21:50 lr: 0.000003 loss_cls: 4.1522 (3.9183) grad_norm: 2.4474 (2.4239) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-11 00:25:16 root] (utils.py 283): INFO Epoch: [24] [ 800/2502] eta: 0:21:43 lr: 0.000003 loss_cls: 4.0475 (3.9195) grad_norm: 2.4418 (2.4236) time: 0.7658 data: 0.0002 max mem: 8426 +[2024-12-11 00:25:24 root] (utils.py 283): INFO Epoch: [24] [ 810/2502] eta: 0:21:35 lr: 0.000003 loss_cls: 3.7138 (3.9141) grad_norm: 2.4618 (2.4244) time: 0.7656 data: 0.0003 max mem: 8426 +[2024-12-11 00:25:32 root] (utils.py 283): INFO Epoch: [24] [ 820/2502] eta: 0:21:27 lr: 0.000003 loss_cls: 3.5661 (3.9110) grad_norm: 2.4236 (2.4240) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-11 00:25:39 root] (utils.py 283): INFO Epoch: [24] [ 830/2502] eta: 0:21:19 lr: 0.000003 loss_cls: 3.8590 (3.9115) grad_norm: 2.3984 (2.4239) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-11 00:25:47 root] (utils.py 283): INFO Epoch: [24] [ 840/2502] eta: 0:21:12 lr: 0.000003 loss_cls: 3.9641 (3.9118) grad_norm: 2.3840 (2.4237) time: 0.7614 data: 0.0003 max mem: 8426 +[2024-12-11 00:25:55 root] (utils.py 283): INFO Epoch: [24] [ 850/2502] eta: 0:21:04 lr: 0.000003 loss_cls: 3.8828 (3.9083) grad_norm: 2.3770 (2.4236) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-11 00:26:02 root] (utils.py 283): INFO Epoch: [24] [ 860/2502] eta: 0:20:57 lr: 0.000003 loss_cls: 4.1011 (3.9110) grad_norm: 2.3929 (2.4237) time: 0.7782 data: 0.0003 max mem: 8426 +[2024-12-11 00:26:10 root] (utils.py 283): INFO Epoch: [24] [ 870/2502] eta: 0:20:50 lr: 0.000003 loss_cls: 4.0878 (3.9104) grad_norm: 2.4351 (2.4242) time: 0.7876 data: 0.0002 max mem: 8426 +[2024-12-11 00:26:18 root] (utils.py 283): INFO Epoch: [24] [ 880/2502] eta: 0:20:42 lr: 0.000003 loss_cls: 4.0739 (3.9124) grad_norm: 2.4490 (2.4245) time: 0.7879 data: 0.0002 max mem: 8426 +[2024-12-11 00:26:26 root] (utils.py 283): INFO Epoch: [24] [ 890/2502] eta: 0:20:35 lr: 0.000003 loss_cls: 3.7485 (3.9073) grad_norm: 2.4220 (2.4233) time: 0.7827 data: 0.0003 max mem: 8426 +[2024-12-11 00:26:34 root] (utils.py 283): INFO Epoch: [24] [ 900/2502] eta: 0:20:28 lr: 0.000003 loss_cls: 3.5988 (3.9067) grad_norm: 2.3604 (2.4235) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-11 00:26:42 root] (utils.py 283): INFO Epoch: [24] [ 910/2502] eta: 0:20:20 lr: 0.000003 loss_cls: 4.0500 (3.9075) grad_norm: 2.5000 (2.4237) time: 0.7817 data: 0.0003 max mem: 8426 +[2024-12-11 00:26:49 root] (utils.py 283): INFO Epoch: [24] [ 920/2502] eta: 0:20:13 lr: 0.000003 loss_cls: 4.0500 (3.9073) grad_norm: 2.3896 (2.4243) time: 0.7800 data: 0.0003 max mem: 8426 +[2024-12-11 00:26:57 root] (utils.py 283): INFO Epoch: [24] [ 930/2502] eta: 0:20:05 lr: 0.000003 loss_cls: 4.3013 (3.9098) grad_norm: 2.3885 (2.4238) time: 0.7803 data: 0.0002 max mem: 8426 +[2024-12-11 00:27:05 root] (utils.py 283): INFO Epoch: [24] [ 940/2502] eta: 0:19:58 lr: 0.000003 loss_cls: 4.2845 (3.9124) grad_norm: 2.3741 (2.4236) time: 0.7826 data: 0.0002 max mem: 8426 +[2024-12-11 00:27:13 root] (utils.py 283): INFO Epoch: [24] [ 950/2502] eta: 0:19:50 lr: 0.000003 loss_cls: 3.8972 (3.9111) grad_norm: 2.3871 (2.4233) time: 0.7831 data: 0.0003 max mem: 8426 +[2024-12-11 00:27:21 root] (utils.py 283): INFO Epoch: [24] [ 960/2502] eta: 0:19:43 lr: 0.000003 loss_cls: 3.5917 (3.9088) grad_norm: 2.4388 (2.4242) time: 0.7822 data: 0.0003 max mem: 8426 +[2024-12-11 00:27:28 root] (utils.py 283): INFO Epoch: [24] [ 970/2502] eta: 0:19:36 lr: 0.000003 loss_cls: 3.5917 (3.9081) grad_norm: 2.4165 (2.4234) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-11 00:27:36 root] (utils.py 283): INFO Epoch: [24] [ 980/2502] eta: 0:19:28 lr: 0.000003 loss_cls: 4.0349 (3.9077) grad_norm: 2.4165 (2.4243) time: 0.7812 data: 0.0003 max mem: 8426 +[2024-12-11 00:27:44 root] (utils.py 283): INFO Epoch: [24] [ 990/2502] eta: 0:19:20 lr: 0.000003 loss_cls: 4.1117 (3.9110) grad_norm: 2.4929 (2.4246) time: 0.7732 data: 0.0003 max mem: 8426 +[2024-12-11 00:27:52 root] (utils.py 283): INFO Epoch: [24] [1000/2502] eta: 0:19:13 lr: 0.000003 loss_cls: 4.0961 (3.9098) grad_norm: 2.3243 (2.4234) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-11 00:27:59 root] (utils.py 283): INFO Epoch: [24] [1010/2502] eta: 0:19:05 lr: 0.000003 loss_cls: 3.9280 (3.9096) grad_norm: 2.3599 (2.4232) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-11 00:28:07 root] (utils.py 283): INFO Epoch: [24] [1020/2502] eta: 0:18:57 lr: 0.000003 loss_cls: 3.8605 (3.9079) grad_norm: 2.4112 (2.4233) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-11 00:28:14 root] (utils.py 283): INFO Epoch: [24] [1030/2502] eta: 0:18:49 lr: 0.000003 loss_cls: 3.7268 (3.9070) grad_norm: 2.4262 (2.4238) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-11 00:28:22 root] (utils.py 283): INFO Epoch: [24] [1040/2502] eta: 0:18:42 lr: 0.000003 loss_cls: 3.6380 (3.9067) grad_norm: 2.4378 (2.4242) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-11 00:28:30 root] (utils.py 283): INFO Epoch: [24] [1050/2502] eta: 0:18:34 lr: 0.000003 loss_cls: 3.8390 (3.9050) grad_norm: 2.4431 (2.4246) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-11 00:28:37 root] (utils.py 283): INFO Epoch: [24] [1060/2502] eta: 0:18:26 lr: 0.000003 loss_cls: 3.9016 (3.9055) grad_norm: 2.4417 (2.4244) time: 0.7613 data: 0.0002 max mem: 8426 +[2024-12-11 00:28:45 root] (utils.py 283): INFO Epoch: [24] [1070/2502] eta: 0:18:19 lr: 0.000003 loss_cls: 3.9465 (3.9063) grad_norm: 2.4445 (2.4252) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-11 00:28:53 root] (utils.py 283): INFO Epoch: [24] [1080/2502] eta: 0:18:11 lr: 0.000003 loss_cls: 4.0431 (3.9060) grad_norm: 2.4480 (2.4253) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-11 00:29:00 root] (utils.py 283): INFO Epoch: [24] [1090/2502] eta: 0:18:03 lr: 0.000003 loss_cls: 4.0431 (3.9043) grad_norm: 2.3704 (2.4249) time: 0.7670 data: 0.0002 max mem: 8426 +[2024-12-11 00:29:08 root] (utils.py 283): INFO Epoch: [24] [1100/2502] eta: 0:17:56 lr: 0.000003 loss_cls: 4.0461 (3.9061) grad_norm: 2.3806 (2.4249) time: 0.7702 data: 0.0002 max mem: 8426 +[2024-12-11 00:29:16 root] (utils.py 283): INFO Epoch: [24] [1110/2502] eta: 0:17:48 lr: 0.000003 loss_cls: 4.1844 (3.9059) grad_norm: 2.3847 (2.4248) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-11 00:29:23 root] (utils.py 283): INFO Epoch: [24] [1120/2502] eta: 0:17:40 lr: 0.000003 loss_cls: 3.7629 (3.9016) grad_norm: 2.4204 (2.4249) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-11 00:29:31 root] (utils.py 283): INFO Epoch: [24] [1130/2502] eta: 0:17:32 lr: 0.000003 loss_cls: 3.7787 (3.9016) grad_norm: 2.4421 (2.4255) time: 0.7628 data: 0.0002 max mem: 8426 +[2024-12-11 00:29:39 root] (utils.py 283): INFO Epoch: [24] [1140/2502] eta: 0:17:25 lr: 0.000003 loss_cls: 3.9255 (3.8994) grad_norm: 2.4561 (2.4256) time: 0.7685 data: 0.0003 max mem: 8426 +[2024-12-11 00:29:46 root] (utils.py 283): INFO Epoch: [24] [1150/2502] eta: 0:17:17 lr: 0.000003 loss_cls: 3.6919 (3.8989) grad_norm: 2.4035 (2.4254) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-11 00:29:54 root] (utils.py 283): INFO Epoch: [24] [1160/2502] eta: 0:17:09 lr: 0.000003 loss_cls: 3.7617 (3.8985) grad_norm: 2.4225 (2.4258) time: 0.7634 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:02 root] (utils.py 283): INFO Epoch: [24] [1170/2502] eta: 0:17:02 lr: 0.000003 loss_cls: 3.9817 (3.9002) grad_norm: 2.4348 (2.4260) time: 0.7720 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:10 root] (utils.py 283): INFO Epoch: [24] [1180/2502] eta: 0:16:54 lr: 0.000003 loss_cls: 4.1037 (3.9010) grad_norm: 2.4477 (2.4260) time: 0.7784 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:17 root] (utils.py 283): INFO Epoch: [24] [1190/2502] eta: 0:16:46 lr: 0.000003 loss_cls: 3.9633 (3.9017) grad_norm: 2.4357 (2.4258) time: 0.7697 data: 0.0003 max mem: 8426 +[2024-12-11 00:30:25 root] (utils.py 283): INFO Epoch: [24] [1200/2502] eta: 0:16:39 lr: 0.000003 loss_cls: 3.9510 (3.9018) grad_norm: 2.4437 (2.4262) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:32 root] (utils.py 283): INFO Epoch: [24] [1210/2502] eta: 0:16:31 lr: 0.000003 loss_cls: 3.9323 (3.9019) grad_norm: 2.3542 (2.4258) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:40 root] (utils.py 283): INFO Epoch: [24] [1220/2502] eta: 0:16:23 lr: 0.000003 loss_cls: 3.9743 (3.9026) grad_norm: 2.3531 (2.4255) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:48 root] (utils.py 283): INFO Epoch: [24] [1230/2502] eta: 0:16:16 lr: 0.000003 loss_cls: 3.9901 (3.9040) grad_norm: 2.4032 (2.4253) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-11 00:30:55 root] (utils.py 283): INFO Epoch: [24] [1240/2502] eta: 0:16:08 lr: 0.000003 loss_cls: 3.7122 (3.8998) grad_norm: 2.4038 (2.4254) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-11 00:31:03 root] (utils.py 283): INFO Epoch: [24] [1250/2502] eta: 0:16:00 lr: 0.000003 loss_cls: 3.4895 (3.9003) grad_norm: 2.3993 (2.4250) time: 0.7704 data: 0.0002 max mem: 8426 +[2024-12-11 00:31:11 root] (utils.py 283): INFO Epoch: [24] [1260/2502] eta: 0:15:53 lr: 0.000003 loss_cls: 4.0211 (3.9008) grad_norm: 2.3797 (2.4247) time: 0.7665 data: 0.0002 max mem: 8426 +[2024-12-11 00:31:18 root] (utils.py 283): INFO Epoch: [24] [1270/2502] eta: 0:15:45 lr: 0.000003 loss_cls: 3.9673 (3.9004) grad_norm: 2.3797 (2.4245) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-11 00:31:26 root] (utils.py 283): INFO Epoch: [24] [1280/2502] eta: 0:15:37 lr: 0.000003 loss_cls: 3.8677 (3.8998) grad_norm: 2.3955 (2.4241) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:31:34 root] (utils.py 283): INFO Epoch: [24] [1290/2502] eta: 0:15:29 lr: 0.000003 loss_cls: 3.8677 (3.9003) grad_norm: 2.4326 (2.4251) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-11 00:31:41 root] (utils.py 283): INFO Epoch: [24] [1300/2502] eta: 0:15:22 lr: 0.000003 loss_cls: 4.2049 (3.9031) grad_norm: 2.4576 (2.4252) time: 0.7641 data: 0.0003 max mem: 8426 +[2024-12-11 00:31:49 root] (utils.py 283): INFO Epoch: [24] [1310/2502] eta: 0:15:14 lr: 0.000003 loss_cls: 3.9765 (3.8997) grad_norm: 2.4625 (2.4258) time: 0.7616 data: 0.0002 max mem: 8426 +[2024-12-11 00:31:57 root] (utils.py 283): INFO Epoch: [24] [1320/2502] eta: 0:15:06 lr: 0.000003 loss_cls: 3.9336 (3.9008) grad_norm: 2.4388 (2.4260) time: 0.7608 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:04 root] (utils.py 283): INFO Epoch: [24] [1330/2502] eta: 0:14:59 lr: 0.000003 loss_cls: 4.0394 (3.9008) grad_norm: 2.5103 (2.4265) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:12 root] (utils.py 283): INFO Epoch: [24] [1340/2502] eta: 0:14:51 lr: 0.000003 loss_cls: 3.8668 (3.8998) grad_norm: 2.4887 (2.4264) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:19 root] (utils.py 283): INFO Epoch: [24] [1350/2502] eta: 0:14:43 lr: 0.000003 loss_cls: 3.9880 (3.8992) grad_norm: 2.3802 (2.4263) time: 0.7622 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:27 root] (utils.py 283): INFO Epoch: [24] [1360/2502] eta: 0:14:35 lr: 0.000003 loss_cls: 3.8163 (3.8982) grad_norm: 2.3675 (2.4261) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:35 root] (utils.py 283): INFO Epoch: [24] [1370/2502] eta: 0:14:28 lr: 0.000003 loss_cls: 3.7445 (3.8981) grad_norm: 2.3531 (2.4257) time: 0.7611 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:42 root] (utils.py 283): INFO Epoch: [24] [1380/2502] eta: 0:14:20 lr: 0.000003 loss_cls: 3.7340 (3.8971) grad_norm: 2.3331 (2.4255) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:50 root] (utils.py 283): INFO Epoch: [24] [1390/2502] eta: 0:14:12 lr: 0.000003 loss_cls: 3.7345 (3.8962) grad_norm: 2.3589 (2.4253) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-11 00:32:58 root] (utils.py 283): INFO Epoch: [24] [1400/2502] eta: 0:14:05 lr: 0.000003 loss_cls: 3.7764 (3.8958) grad_norm: 2.3821 (2.4254) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-11 00:33:05 root] (utils.py 283): INFO Epoch: [24] [1410/2502] eta: 0:13:57 lr: 0.000003 loss_cls: 4.1222 (3.8969) grad_norm: 2.4117 (2.4256) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-11 00:33:13 root] (utils.py 283): INFO Epoch: [24] [1420/2502] eta: 0:13:49 lr: 0.000003 loss_cls: 3.8750 (3.8946) grad_norm: 2.4440 (2.4261) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-11 00:33:21 root] (utils.py 283): INFO Epoch: [24] [1430/2502] eta: 0:13:42 lr: 0.000003 loss_cls: 3.8750 (3.8958) grad_norm: 2.4990 (2.4265) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-11 00:33:28 root] (utils.py 283): INFO Epoch: [24] [1440/2502] eta: 0:13:34 lr: 0.000003 loss_cls: 4.1561 (3.8957) grad_norm: 2.4698 (2.4270) time: 0.7694 data: 0.0002 max mem: 8426 +[2024-12-11 00:33:36 root] (utils.py 283): INFO Epoch: [24] [1450/2502] eta: 0:13:26 lr: 0.000003 loss_cls: 3.5789 (3.8943) grad_norm: 2.4768 (2.4275) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-11 00:33:44 root] (utils.py 283): INFO Epoch: [24] [1460/2502] eta: 0:13:19 lr: 0.000003 loss_cls: 3.7327 (3.8922) grad_norm: 2.3939 (2.4271) time: 0.7651 data: 0.0003 max mem: 8426 +[2024-12-11 00:33:51 root] (utils.py 283): INFO Epoch: [24] [1470/2502] eta: 0:13:11 lr: 0.000003 loss_cls: 3.8952 (3.8916) grad_norm: 2.3683 (2.4268) time: 0.7636 data: 0.0003 max mem: 8426 +[2024-12-11 00:33:59 root] (utils.py 283): INFO Epoch: [24] [1480/2502] eta: 0:13:03 lr: 0.000003 loss_cls: 3.9895 (3.8926) grad_norm: 2.3731 (2.4267) time: 0.7606 data: 0.0003 max mem: 8426 +[2024-12-11 00:34:07 root] (utils.py 283): INFO Epoch: [24] [1490/2502] eta: 0:12:56 lr: 0.000003 loss_cls: 3.9912 (3.8929) grad_norm: 2.3731 (2.4270) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-11 00:34:14 root] (utils.py 283): INFO Epoch: [24] [1500/2502] eta: 0:12:48 lr: 0.000003 loss_cls: 3.9905 (3.8916) grad_norm: 2.3839 (2.4268) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-11 00:34:22 root] (utils.py 283): INFO Epoch: [24] [1510/2502] eta: 0:12:40 lr: 0.000003 loss_cls: 3.9329 (3.8911) grad_norm: 2.3658 (2.4266) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-11 00:34:29 root] (utils.py 283): INFO Epoch: [24] [1520/2502] eta: 0:12:33 lr: 0.000003 loss_cls: 3.9329 (3.8904) grad_norm: 2.3658 (2.4267) time: 0.7654 data: 0.0003 max mem: 8426 +[2024-12-11 00:34:37 root] (utils.py 283): INFO Epoch: [24] [1530/2502] eta: 0:12:25 lr: 0.000003 loss_cls: 4.0106 (3.8909) grad_norm: 2.3373 (2.4262) time: 0.7632 data: 0.0003 max mem: 8426 +[2024-12-11 00:34:45 root] (utils.py 283): INFO Epoch: [24] [1540/2502] eta: 0:12:17 lr: 0.000003 loss_cls: 4.0869 (3.8907) grad_norm: 2.3706 (2.4263) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-11 00:34:52 root] (utils.py 283): INFO Epoch: [24] [1550/2502] eta: 0:12:09 lr: 0.000003 loss_cls: 3.8343 (3.8898) grad_norm: 2.4022 (2.4260) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-11 00:35:00 root] (utils.py 283): INFO Epoch: [24] [1560/2502] eta: 0:12:02 lr: 0.000003 loss_cls: 3.8343 (3.8898) grad_norm: 2.3623 (2.4260) time: 0.7624 data: 0.0003 max mem: 8426 +[2024-12-11 00:35:08 root] (utils.py 283): INFO Epoch: [24] [1570/2502] eta: 0:11:54 lr: 0.000003 loss_cls: 3.7551 (3.8883) grad_norm: 2.3986 (2.4260) time: 0.7691 data: 0.0003 max mem: 8426 +[2024-12-11 00:35:15 root] (utils.py 283): INFO Epoch: [24] [1580/2502] eta: 0:11:47 lr: 0.000003 loss_cls: 3.7008 (3.8889) grad_norm: 2.3986 (2.4261) time: 0.7711 data: 0.0003 max mem: 8426 +[2024-12-11 00:35:23 root] (utils.py 283): INFO Epoch: [24] [1590/2502] eta: 0:11:39 lr: 0.000003 loss_cls: 4.1036 (3.8897) grad_norm: 2.4009 (2.4260) time: 0.7655 data: 0.0003 max mem: 8426 +[2024-12-11 00:35:31 root] (utils.py 283): INFO Epoch: [24] [1600/2502] eta: 0:11:31 lr: 0.000003 loss_cls: 3.9675 (3.8888) grad_norm: 2.4009 (2.4261) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-11 00:35:38 root] (utils.py 283): INFO Epoch: [24] [1610/2502] eta: 0:11:23 lr: 0.000003 loss_cls: 3.9675 (3.8895) grad_norm: 2.4142 (2.4262) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-11 00:35:46 root] (utils.py 283): INFO Epoch: [24] [1620/2502] eta: 0:11:16 lr: 0.000003 loss_cls: 4.1451 (3.8902) grad_norm: 2.4791 (2.4267) time: 0.7659 data: 0.0002 max mem: 8426 +[2024-12-11 00:35:54 root] (utils.py 283): INFO Epoch: [24] [1630/2502] eta: 0:11:08 lr: 0.000003 loss_cls: 4.1960 (3.8907) grad_norm: 2.3955 (2.4265) time: 0.7737 data: 0.0002 max mem: 8426 +[2024-12-11 00:36:02 root] (utils.py 283): INFO Epoch: [24] [1640/2502] eta: 0:11:01 lr: 0.000003 loss_cls: 3.7508 (3.8894) grad_norm: 2.4014 (2.4265) time: 0.7752 data: 0.0002 max mem: 8426 +[2024-12-11 00:36:09 root] (utils.py 283): INFO Epoch: [24] [1650/2502] eta: 0:10:53 lr: 0.000003 loss_cls: 4.0857 (3.8909) grad_norm: 2.4026 (2.4262) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-11 00:36:17 root] (utils.py 283): INFO Epoch: [24] [1660/2502] eta: 0:10:45 lr: 0.000003 loss_cls: 4.1356 (3.8909) grad_norm: 2.4208 (2.4264) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-11 00:36:25 root] (utils.py 283): INFO Epoch: [24] [1670/2502] eta: 0:10:38 lr: 0.000003 loss_cls: 4.1541 (3.8904) grad_norm: 2.4208 (2.4262) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-11 00:36:32 root] (utils.py 283): INFO Epoch: [24] [1680/2502] eta: 0:10:30 lr: 0.000003 loss_cls: 3.9768 (3.8897) grad_norm: 2.3897 (2.4261) time: 0.7700 data: 0.0003 max mem: 8426 +[2024-12-11 00:36:40 root] (utils.py 283): INFO Epoch: [24] [1690/2502] eta: 0:10:22 lr: 0.000003 loss_cls: 4.0967 (3.8916) grad_norm: 2.4415 (2.4263) time: 0.7795 data: 0.0002 max mem: 8426 +[2024-12-11 00:36:48 root] (utils.py 283): INFO Epoch: [24] [1700/2502] eta: 0:10:15 lr: 0.000003 loss_cls: 4.1470 (3.8920) grad_norm: 2.3834 (2.4260) time: 0.7783 data: 0.0002 max mem: 8426 +[2024-12-11 00:36:56 root] (utils.py 283): INFO Epoch: [24] [1710/2502] eta: 0:10:07 lr: 0.000003 loss_cls: 4.0355 (3.8922) grad_norm: 2.4020 (2.4265) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:03 root] (utils.py 283): INFO Epoch: [24] [1720/2502] eta: 0:09:59 lr: 0.000003 loss_cls: 3.7892 (3.8905) grad_norm: 2.4596 (2.4266) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:11 root] (utils.py 283): INFO Epoch: [24] [1730/2502] eta: 0:09:52 lr: 0.000003 loss_cls: 3.5731 (3.8895) grad_norm: 2.4140 (2.4266) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:18 root] (utils.py 283): INFO Epoch: [24] [1740/2502] eta: 0:09:44 lr: 0.000003 loss_cls: 4.0847 (3.8906) grad_norm: 2.4140 (2.4266) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-11 00:37:26 root] (utils.py 283): INFO Epoch: [24] [1750/2502] eta: 0:09:36 lr: 0.000003 loss_cls: 4.1526 (3.8897) grad_norm: 2.4049 (2.4264) time: 0.7601 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:34 root] (utils.py 283): INFO Epoch: [24] [1760/2502] eta: 0:09:29 lr: 0.000003 loss_cls: 3.9333 (3.8899) grad_norm: 2.3987 (2.4263) time: 0.7591 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:41 root] (utils.py 283): INFO Epoch: [24] [1770/2502] eta: 0:09:21 lr: 0.000003 loss_cls: 4.0974 (3.8901) grad_norm: 2.3741 (2.4262) time: 0.7582 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:49 root] (utils.py 283): INFO Epoch: [24] [1780/2502] eta: 0:09:13 lr: 0.000003 loss_cls: 4.0974 (3.8903) grad_norm: 2.4310 (2.4262) time: 0.7607 data: 0.0002 max mem: 8426 +[2024-12-11 00:37:57 root] (utils.py 283): INFO Epoch: [24] [1790/2502] eta: 0:09:05 lr: 0.000003 loss_cls: 3.9397 (3.8892) grad_norm: 2.4281 (2.4262) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-11 00:38:04 root] (utils.py 283): INFO Epoch: [24] [1800/2502] eta: 0:08:58 lr: 0.000003 loss_cls: 3.8553 (3.8877) grad_norm: 2.4273 (2.4266) time: 0.7688 data: 0.0002 max mem: 8426 +[2024-12-11 00:38:12 root] (utils.py 283): INFO Epoch: [24] [1810/2502] eta: 0:08:50 lr: 0.000003 loss_cls: 3.6694 (3.8868) grad_norm: 2.4422 (2.4266) time: 0.7679 data: 0.0002 max mem: 8426 +[2024-12-11 00:38:20 root] (utils.py 283): INFO Epoch: [24] [1820/2502] eta: 0:08:42 lr: 0.000003 loss_cls: 4.1186 (3.8882) grad_norm: 2.4167 (2.4270) time: 0.7679 data: 0.0003 max mem: 8426 +[2024-12-11 00:38:27 root] (utils.py 283): INFO Epoch: [24] [1830/2502] eta: 0:08:35 lr: 0.000003 loss_cls: 4.0973 (3.8881) grad_norm: 2.4038 (2.4268) time: 0.7660 data: 0.0003 max mem: 8426 +[2024-12-11 00:38:35 root] (utils.py 283): INFO Epoch: [24] [1840/2502] eta: 0:08:27 lr: 0.000003 loss_cls: 3.9801 (3.8881) grad_norm: 2.4038 (2.4266) time: 0.7601 data: 0.0003 max mem: 8426 +[2024-12-11 00:38:42 root] (utils.py 283): INFO Epoch: [24] [1850/2502] eta: 0:08:19 lr: 0.000003 loss_cls: 4.0692 (3.8885) grad_norm: 2.3841 (2.4263) time: 0.7625 data: 0.0002 max mem: 8426 +[2024-12-11 00:38:50 root] (utils.py 283): INFO Epoch: [24] [1860/2502] eta: 0:08:12 lr: 0.000003 loss_cls: 3.9835 (3.8886) grad_norm: 2.3755 (2.4261) time: 0.7669 data: 0.0003 max mem: 8426 +[2024-12-11 00:38:58 root] (utils.py 283): INFO Epoch: [24] [1870/2502] eta: 0:08:04 lr: 0.000003 loss_cls: 4.1103 (3.8903) grad_norm: 2.4386 (2.4266) time: 0.7760 data: 0.0003 max mem: 8426 +[2024-12-11 00:39:06 root] (utils.py 283): INFO Epoch: [24] [1880/2502] eta: 0:07:57 lr: 0.000003 loss_cls: 4.2299 (3.8913) grad_norm: 2.3919 (2.4264) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-11 00:39:14 root] (utils.py 283): INFO Epoch: [24] [1890/2502] eta: 0:07:49 lr: 0.000003 loss_cls: 4.2299 (3.8927) grad_norm: 2.3906 (2.4262) time: 0.7810 data: 0.0002 max mem: 8426 +[2024-12-11 00:39:21 root] (utils.py 283): INFO Epoch: [24] [1900/2502] eta: 0:07:41 lr: 0.000003 loss_cls: 4.2356 (3.8933) grad_norm: 2.4401 (2.4263) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-11 00:39:29 root] (utils.py 283): INFO Epoch: [24] [1910/2502] eta: 0:07:34 lr: 0.000003 loss_cls: 4.1401 (3.8927) grad_norm: 2.3368 (2.4259) time: 0.7812 data: 0.0002 max mem: 8426 +[2024-12-11 00:39:37 root] (utils.py 283): INFO Epoch: [24] [1920/2502] eta: 0:07:26 lr: 0.000003 loss_cls: 3.9726 (3.8923) grad_norm: 2.4223 (2.4261) time: 0.7828 data: 0.0002 max mem: 8426 +[2024-12-11 00:39:45 root] (utils.py 283): INFO Epoch: [24] [1930/2502] eta: 0:07:18 lr: 0.000003 loss_cls: 3.8823 (3.8916) grad_norm: 2.4242 (2.4258) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-11 00:39:53 root] (utils.py 283): INFO Epoch: [24] [1940/2502] eta: 0:07:11 lr: 0.000003 loss_cls: 3.8469 (3.8908) grad_norm: 2.3278 (2.4258) time: 0.7817 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:00 root] (utils.py 283): INFO Epoch: [24] [1950/2502] eta: 0:07:03 lr: 0.000003 loss_cls: 4.0364 (3.8919) grad_norm: 2.3765 (2.4257) time: 0.7811 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:08 root] (utils.py 283): INFO Epoch: [24] [1960/2502] eta: 0:06:56 lr: 0.000003 loss_cls: 4.0872 (3.8921) grad_norm: 2.3464 (2.4255) time: 0.7816 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:16 root] (utils.py 283): INFO Epoch: [24] [1970/2502] eta: 0:06:48 lr: 0.000003 loss_cls: 4.0742 (3.8922) grad_norm: 2.3336 (2.4252) time: 0.7821 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:24 root] (utils.py 283): INFO Epoch: [24] [1980/2502] eta: 0:06:40 lr: 0.000003 loss_cls: 4.0961 (3.8919) grad_norm: 2.4156 (2.4253) time: 0.7824 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:32 root] (utils.py 283): INFO Epoch: [24] [1990/2502] eta: 0:06:33 lr: 0.000003 loss_cls: 3.8447 (3.8920) grad_norm: 2.4202 (2.4252) time: 0.7820 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:40 root] (utils.py 283): INFO Epoch: [24] [2000/2502] eta: 0:06:25 lr: 0.000003 loss_cls: 3.9738 (3.8923) grad_norm: 2.4036 (2.4252) time: 0.7852 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:47 root] (utils.py 283): INFO Epoch: [24] [2010/2502] eta: 0:06:17 lr: 0.000003 loss_cls: 4.0571 (3.8924) grad_norm: 2.3567 (2.4249) time: 0.7857 data: 0.0002 max mem: 8426 +[2024-12-11 00:40:55 root] (utils.py 283): INFO Epoch: [24] [2020/2502] eta: 0:06:10 lr: 0.000003 loss_cls: 4.0046 (3.8915) grad_norm: 2.4146 (2.4250) time: 0.7818 data: 0.0002 max mem: 8426 +[2024-12-11 00:41:03 root] (utils.py 283): INFO Epoch: [24] [2030/2502] eta: 0:06:02 lr: 0.000003 loss_cls: 3.7978 (3.8915) grad_norm: 2.3912 (2.4249) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-11 00:41:11 root] (utils.py 283): INFO Epoch: [24] [2040/2502] eta: 0:05:54 lr: 0.000003 loss_cls: 4.0875 (3.8912) grad_norm: 2.3757 (2.4247) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-11 00:41:18 root] (utils.py 283): INFO Epoch: [24] [2050/2502] eta: 0:05:47 lr: 0.000003 loss_cls: 3.7616 (3.8899) grad_norm: 2.3757 (2.4247) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-11 00:41:26 root] (utils.py 283): INFO Epoch: [24] [2060/2502] eta: 0:05:39 lr: 0.000003 loss_cls: 3.6203 (3.8884) grad_norm: 2.4055 (2.4250) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-11 00:41:33 root] (utils.py 283): INFO Epoch: [24] [2070/2502] eta: 0:05:31 lr: 0.000003 loss_cls: 3.6203 (3.8881) grad_norm: 2.4094 (2.4251) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-11 00:41:41 root] (utils.py 283): INFO Epoch: [24] [2080/2502] eta: 0:05:24 lr: 0.000003 loss_cls: 3.8429 (3.8875) grad_norm: 2.4094 (2.4249) time: 0.7646 data: 0.0002 max mem: 8426 +[2024-12-11 00:41:49 root] (utils.py 283): INFO Epoch: [24] [2090/2502] eta: 0:05:16 lr: 0.000003 loss_cls: 3.8074 (3.8870) grad_norm: 2.3908 (2.4249) time: 0.7661 data: 0.0002 max mem: 8426 +[2024-12-11 00:41:57 root] (utils.py 283): INFO Epoch: [24] [2100/2502] eta: 0:05:08 lr: 0.000003 loss_cls: 3.8625 (3.8870) grad_norm: 2.3562 (2.4244) time: 0.7755 data: 0.0002 max mem: 8426 +[2024-12-11 00:42:04 root] (utils.py 283): INFO Epoch: [24] [2110/2502] eta: 0:05:01 lr: 0.000003 loss_cls: 3.7878 (3.8863) grad_norm: 2.3684 (2.4244) time: 0.7813 data: 0.0002 max mem: 8426 +[2024-12-11 00:42:12 root] (utils.py 283): INFO Epoch: [24] [2120/2502] eta: 0:04:53 lr: 0.000003 loss_cls: 3.9707 (3.8866) grad_norm: 2.3954 (2.4241) time: 0.7803 data: 0.0003 max mem: 8426 +[2024-12-11 00:42:20 root] (utils.py 283): INFO Epoch: [24] [2130/2502] eta: 0:04:45 lr: 0.000003 loss_cls: 4.0968 (3.8881) grad_norm: 2.4239 (2.4243) time: 0.7844 data: 0.0003 max mem: 8426 +[2024-12-11 00:42:28 root] (utils.py 283): INFO Epoch: [24] [2140/2502] eta: 0:04:38 lr: 0.000003 loss_cls: 4.0912 (3.8887) grad_norm: 2.4425 (2.4243) time: 0.7907 data: 0.0003 max mem: 8426 +[2024-12-11 00:42:36 root] (utils.py 283): INFO Epoch: [24] [2150/2502] eta: 0:04:30 lr: 0.000003 loss_cls: 3.9480 (3.8886) grad_norm: 2.4202 (2.4244) time: 0.7904 data: 0.0002 max mem: 8426 +[2024-12-11 00:42:44 root] (utils.py 283): INFO Epoch: [24] [2160/2502] eta: 0:04:22 lr: 0.000003 loss_cls: 3.8813 (3.8879) grad_norm: 2.4202 (2.4246) time: 0.7841 data: 0.0002 max mem: 8426 +[2024-12-11 00:42:52 root] (utils.py 283): INFO Epoch: [24] [2170/2502] eta: 0:04:15 lr: 0.000003 loss_cls: 3.8442 (3.8872) grad_norm: 2.4629 (2.4250) time: 0.7831 data: 0.0002 max mem: 8426 +[2024-12-11 00:42:59 root] (utils.py 283): INFO Epoch: [24] [2180/2502] eta: 0:04:07 lr: 0.000003 loss_cls: 3.9060 (3.8869) grad_norm: 2.4641 (2.4253) time: 0.7818 data: 0.0002 max mem: 8426 +[2024-12-11 00:43:07 root] (utils.py 283): INFO Epoch: [24] [2190/2502] eta: 0:03:59 lr: 0.000003 loss_cls: 3.9130 (3.8865) grad_norm: 2.5029 (2.4256) time: 0.7806 data: 0.0002 max mem: 8426 +[2024-12-11 00:43:15 root] (utils.py 283): INFO Epoch: [24] [2200/2502] eta: 0:03:52 lr: 0.000003 loss_cls: 3.8569 (3.8869) grad_norm: 2.4205 (2.4255) time: 0.7765 data: 0.0002 max mem: 8426 +[2024-12-11 00:43:23 root] (utils.py 283): INFO Epoch: [24] [2210/2502] eta: 0:03:44 lr: 0.000003 loss_cls: 3.7728 (3.8859) grad_norm: 2.4205 (2.4257) time: 0.7725 data: 0.0002 max mem: 8426 +[2024-12-11 00:43:30 root] (utils.py 283): INFO Epoch: [24] [2220/2502] eta: 0:03:36 lr: 0.000003 loss_cls: 3.7818 (3.8860) grad_norm: 2.4459 (2.4257) time: 0.7675 data: 0.0002 max mem: 8426 +[2024-12-11 00:43:38 root] (utils.py 283): INFO Epoch: [24] [2230/2502] eta: 0:03:29 lr: 0.000003 loss_cls: 3.5981 (3.8842) grad_norm: 2.4459 (2.4260) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-11 00:43:46 root] (utils.py 283): INFO Epoch: [24] [2240/2502] eta: 0:03:21 lr: 0.000003 loss_cls: 3.9630 (3.8848) grad_norm: 2.3671 (2.4258) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-11 00:43:53 root] (utils.py 283): INFO Epoch: [24] [2250/2502] eta: 0:03:13 lr: 0.000003 loss_cls: 4.1130 (3.8852) grad_norm: 2.3671 (2.4258) time: 0.7620 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:01 root] (utils.py 283): INFO Epoch: [24] [2260/2502] eta: 0:03:05 lr: 0.000003 loss_cls: 3.8463 (3.8857) grad_norm: 2.4030 (2.4259) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:08 root] (utils.py 283): INFO Epoch: [24] [2270/2502] eta: 0:02:58 lr: 0.000003 loss_cls: 3.9762 (3.8859) grad_norm: 2.4030 (2.4257) time: 0.7665 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:16 root] (utils.py 283): INFO Epoch: [24] [2280/2502] eta: 0:02:50 lr: 0.000003 loss_cls: 4.0728 (3.8856) grad_norm: 2.4140 (2.4256) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-11 00:44:24 root] (utils.py 283): INFO Epoch: [24] [2290/2502] eta: 0:02:42 lr: 0.000003 loss_cls: 4.0956 (3.8860) grad_norm: 2.3498 (2.4253) time: 0.7629 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:31 root] (utils.py 283): INFO Epoch: [24] [2300/2502] eta: 0:02:35 lr: 0.000003 loss_cls: 4.1569 (3.8859) grad_norm: 2.3887 (2.4255) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:39 root] (utils.py 283): INFO Epoch: [24] [2310/2502] eta: 0:02:27 lr: 0.000003 loss_cls: 4.0531 (3.8861) grad_norm: 2.4165 (2.4255) time: 0.7644 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:47 root] (utils.py 283): INFO Epoch: [24] [2320/2502] eta: 0:02:19 lr: 0.000003 loss_cls: 4.0947 (3.8874) grad_norm: 2.4051 (2.4257) time: 0.7627 data: 0.0003 max mem: 8426 +[2024-12-11 00:44:54 root] (utils.py 283): INFO Epoch: [24] [2330/2502] eta: 0:02:12 lr: 0.000003 loss_cls: 3.8241 (3.8854) grad_norm: 2.4051 (2.4255) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-11 00:45:02 root] (utils.py 283): INFO Epoch: [24] [2340/2502] eta: 0:02:04 lr: 0.000003 loss_cls: 3.5878 (3.8857) grad_norm: 2.3806 (2.4255) time: 0.7647 data: 0.0003 max mem: 8426 +[2024-12-11 00:45:10 root] (utils.py 283): INFO Epoch: [24] [2350/2502] eta: 0:01:56 lr: 0.000003 loss_cls: 3.7892 (3.8856) grad_norm: 2.4064 (2.4255) time: 0.7678 data: 0.0002 max mem: 8426 +[2024-12-11 00:45:17 root] (utils.py 283): INFO Epoch: [24] [2360/2502] eta: 0:01:49 lr: 0.000003 loss_cls: 3.8005 (3.8853) grad_norm: 2.3903 (2.4254) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-11 00:45:25 root] (utils.py 283): INFO Epoch: [24] [2370/2502] eta: 0:01:41 lr: 0.000003 loss_cls: 3.8005 (3.8842) grad_norm: 2.3903 (2.4257) time: 0.7718 data: 0.0002 max mem: 8426 +[2024-12-11 00:45:33 root] (utils.py 283): INFO Epoch: [24] [2380/2502] eta: 0:01:33 lr: 0.000003 loss_cls: 4.0022 (3.8841) grad_norm: 2.3851 (2.4254) time: 0.7739 data: 0.0002 max mem: 8426 +[2024-12-11 00:45:40 root] (utils.py 283): INFO Epoch: [24] [2390/2502] eta: 0:01:26 lr: 0.000003 loss_cls: 3.9180 (3.8837) grad_norm: 2.3972 (2.4254) time: 0.7657 data: 0.0003 max mem: 8426 +[2024-12-11 00:45:48 root] (utils.py 283): INFO Epoch: [24] [2400/2502] eta: 0:01:18 lr: 0.000003 loss_cls: 4.0914 (3.8844) grad_norm: 2.4218 (2.4251) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-11 00:45:56 root] (utils.py 283): INFO Epoch: [24] [2410/2502] eta: 0:01:10 lr: 0.000003 loss_cls: 4.1412 (3.8860) grad_norm: 2.4243 (2.4251) time: 0.7685 data: 0.0002 max mem: 8426 +[2024-12-11 00:46:03 root] (utils.py 283): INFO Epoch: [24] [2420/2502] eta: 0:01:03 lr: 0.000003 loss_cls: 4.1412 (3.8851) grad_norm: 2.4593 (2.4254) time: 0.7671 data: 0.0002 max mem: 8426 +[2024-12-11 00:46:11 root] (utils.py 283): INFO Epoch: [24] [2430/2502] eta: 0:00:55 lr: 0.000003 loss_cls: 3.8883 (3.8847) grad_norm: 2.3943 (2.4252) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-11 00:46:19 root] (utils.py 283): INFO Epoch: [24] [2440/2502] eta: 0:00:47 lr: 0.000003 loss_cls: 3.7657 (3.8842) grad_norm: 2.3737 (2.4251) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-11 00:46:26 root] (utils.py 283): INFO Epoch: [24] [2450/2502] eta: 0:00:39 lr: 0.000003 loss_cls: 3.7657 (3.8842) grad_norm: 2.3928 (2.4251) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-11 00:46:34 root] (utils.py 283): INFO Epoch: [24] [2460/2502] eta: 0:00:32 lr: 0.000003 loss_cls: 4.0582 (3.8847) grad_norm: 2.3777 (2.4252) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-11 00:46:42 root] (utils.py 283): INFO Epoch: [24] [2470/2502] eta: 0:00:24 lr: 0.000003 loss_cls: 3.8009 (3.8824) grad_norm: 2.3611 (2.4250) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-11 00:46:49 root] (utils.py 283): INFO Epoch: [24] [2480/2502] eta: 0:00:16 lr: 0.000003 loss_cls: 3.6553 (3.8825) grad_norm: 2.3611 (2.4249) time: 0.7642 data: 0.0002 max mem: 8426 +[2024-12-11 00:46:57 root] (utils.py 283): INFO Epoch: [24] [2490/2502] eta: 0:00:09 lr: 0.000003 loss_cls: 4.2254 (3.8838) grad_norm: 2.4070 (2.4247) time: 0.7935 data: 0.0232 max mem: 8426 +[2024-12-11 00:47:05 root] (utils.py 283): INFO Epoch: [24] [2500/2502] eta: 0:00:01 lr: 0.000003 loss_cls: 4.2474 (3.8847) grad_norm: 2.3483 (2.4248) time: 0.7931 data: 0.0232 max mem: 8426 +[2024-12-11 00:47:06 root] (utils.py 283): INFO Epoch: [24] [2501/2502] eta: 0:00:00 lr: 0.000003 loss_cls: 4.2623 (3.8848) grad_norm: 2.3424 (2.4248) time: 0.7939 data: 0.0232 max mem: 8426 +[2024-12-11 00:47:06 root] (utils.py 297): INFO Epoch: [24] Total time: 0:32:02 (0.7686 s / it) +[2024-12-11 00:47:06 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 4.2623 (3.8876) grad_norm: 2.3424 (2.4248) +[2024-12-11 00:47:06 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6292 (0.6292) acc1: 85.1562 (85.1562) acc3: 97.6562 (97.6562) acc5: 99.2188 (99.2188) time: 0.1275 data: 0.0003 max mem: 8426 +[2024-12-11 00:47:08 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7309 (0.7991) acc1: 85.1562 (82.5284) acc3: 95.3125 (93.7500) acc5: 96.8750 (96.8750) time: 0.1277 data: 0.0004 max mem: 8426 +[2024-12-11 00:47:09 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8513 (0.8573) acc1: 79.6875 (81.3244) acc3: 91.4062 (93.0804) acc5: 95.3125 (95.7589) time: 0.1279 data: 0.0004 max mem: 8426 +[2024-12-11 00:47:10 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9284 (0.8694) acc1: 78.9062 (80.4688) acc3: 92.9688 (93.3216) acc5: 95.3125 (95.8921) time: 0.1281 data: 0.0004 max mem: 8426 +[2024-12-11 00:47:12 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8079 (0.8600) acc1: 79.6875 (80.8117) acc3: 94.5312 (93.3880) acc5: 96.8750 (95.9985) time: 0.1282 data: 0.0005 max mem: 8426 +[2024-12-11 00:47:13 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0427 (0.9459) acc1: 75.7812 (78.7224) acc3: 87.5000 (91.9118) acc5: 92.1875 (94.8376) time: 0.1285 data: 0.0005 max mem: 8426 +[2024-12-11 00:47:14 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2129 (0.9909) acc1: 73.4375 (78.0738) acc3: 85.9375 (90.9964) acc5: 89.8438 (94.0190) time: 0.1286 data: 0.0005 max mem: 8426 +[2024-12-11 00:47:16 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1923 (1.0314) acc1: 75.0000 (77.1237) acc3: 86.7188 (90.4599) acc5: 89.8438 (93.5739) time: 0.1459 data: 0.0179 max mem: 8426 +[2024-12-11 00:47:17 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1962 (1.0649) acc1: 73.4375 (76.3792) acc3: 87.5000 (89.9402) acc5: 89.8438 (93.0459) time: 0.1491 data: 0.0214 max mem: 8426 +[2024-12-11 00:47:19 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2172 (1.0926) acc1: 68.7500 (75.6868) acc3: 85.9375 (89.5519) acc5: 89.8438 (92.7541) time: 0.1429 data: 0.0152 max mem: 8426 +[2024-12-11 00:47:19 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1264 (1.0804) acc1: 74.2188 (75.9120) acc3: 88.2812 (89.7200) acc5: 90.6250 (92.9200) time: 0.1411 data: 0.0152 max mem: 8426 +[2024-12-11 00:47:19 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1346 s / it) +[2024-12-11 00:47:20 root] (engine.py 264): INFO * Acc@1 75.768 Acc@3 89.710 Acc@5 92.928 loss 1.081 flops 1.285 layer_flops 1.251 +[2024-12-11 00:47:20 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.8% +[2024-12-11 00:47:20 root] (main.py 576): INFO Max accuracy: 75.88% +[2024-12-11 00:47:21 root] (utils.py 283): INFO Epoch: [25] [ 0/2502] eta: 0:36:39 lr: 0.000003 loss_cls: 3.4229 (3.4229) grad_norm: 2.3648 (2.3648) time: 0.8790 data: 0.0005 max mem: 8426 +[2024-12-11 00:47:28 root] (utils.py 283): INFO Epoch: [25] [ 10/2502] eta: 0:32:21 lr: 0.000003 loss_cls: 3.4229 (3.7064) grad_norm: 2.3971 (2.4847) time: 0.7791 data: 0.0003 max mem: 8426 +[2024-12-11 00:47:36 root] (utils.py 283): INFO Epoch: [25] [ 20/2502] eta: 0:31:53 lr: 0.000003 loss_cls: 3.6820 (3.7860) grad_norm: 2.4218 (2.4884) time: 0.7655 data: 0.0002 max mem: 8426 +[2024-12-11 00:47:44 root] (utils.py 283): INFO Epoch: [25] [ 30/2502] eta: 0:31:47 lr: 0.000003 loss_cls: 3.8441 (3.8173) grad_norm: 2.4218 (2.4735) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-11 00:47:51 root] (utils.py 283): INFO Epoch: [25] [ 40/2502] eta: 0:31:34 lr: 0.000003 loss_cls: 3.5077 (3.7755) grad_norm: 2.3787 (2.4527) time: 0.7683 data: 0.0002 max mem: 8426 +[2024-12-11 00:47:59 root] (utils.py 283): INFO Epoch: [25] [ 50/2502] eta: 0:31:24 lr: 0.000003 loss_cls: 3.5157 (3.7699) grad_norm: 2.4131 (2.4494) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-11 00:48:07 root] (utils.py 283): INFO Epoch: [25] [ 60/2502] eta: 0:31:16 lr: 0.000003 loss_cls: 3.8719 (3.8326) grad_norm: 2.3819 (2.4310) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-11 00:48:14 root] (utils.py 283): INFO Epoch: [25] [ 70/2502] eta: 0:31:06 lr: 0.000003 loss_cls: 4.0673 (3.8155) grad_norm: 2.4199 (2.4501) time: 0.7645 data: 0.0003 max mem: 8426 +[2024-12-11 00:48:22 root] (utils.py 283): INFO Epoch: [25] [ 80/2502] eta: 0:30:57 lr: 0.000003 loss_cls: 3.8354 (3.8059) grad_norm: 2.4733 (2.4499) time: 0.7636 data: 0.0002 max mem: 8426 +[2024-12-11 00:48:30 root] (utils.py 283): INFO Epoch: [25] [ 90/2502] eta: 0:30:49 lr: 0.000003 loss_cls: 4.0007 (3.8373) grad_norm: 2.4016 (2.4412) time: 0.7640 data: 0.0002 max mem: 8426 +[2024-12-11 00:48:37 root] (utils.py 283): INFO Epoch: [25] [ 100/2502] eta: 0:30:40 lr: 0.000003 loss_cls: 4.0007 (3.8117) grad_norm: 2.3759 (2.4441) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-11 00:48:45 root] (utils.py 283): INFO Epoch: [25] [ 110/2502] eta: 0:30:33 lr: 0.000003 loss_cls: 4.0934 (3.8363) grad_norm: 2.4452 (2.4462) time: 0.7653 data: 0.0002 max mem: 8426 +[2024-12-11 00:48:53 root] (utils.py 283): INFO Epoch: [25] [ 120/2502] eta: 0:30:24 lr: 0.000003 loss_cls: 4.3062 (3.8668) grad_norm: 2.4239 (2.4439) time: 0.7637 data: 0.0002 max mem: 8426 +[2024-12-11 00:49:00 root] (utils.py 283): INFO Epoch: [25] [ 130/2502] eta: 0:30:17 lr: 0.000003 loss_cls: 4.0944 (3.8505) grad_norm: 2.4013 (2.4381) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-11 00:49:08 root] (utils.py 283): INFO Epoch: [25] [ 140/2502] eta: 0:30:09 lr: 0.000003 loss_cls: 3.8971 (3.8751) grad_norm: 2.4172 (2.4399) time: 0.7684 data: 0.0002 max mem: 8426 +[2024-12-11 00:49:16 root] (utils.py 283): INFO Epoch: [25] [ 150/2502] eta: 0:30:02 lr: 0.000003 loss_cls: 4.0451 (3.8790) grad_norm: 2.4300 (2.4399) time: 0.7673 data: 0.0002 max mem: 8426 +[2024-12-11 00:49:23 root] (utils.py 283): INFO Epoch: [25] [ 160/2502] eta: 0:29:54 lr: 0.000003 loss_cls: 3.8626 (3.8781) grad_norm: 2.3914 (2.4366) time: 0.7654 data: 0.0002 max mem: 8426 +[2024-12-11 00:49:31 root] (utils.py 283): INFO Epoch: [25] [ 170/2502] eta: 0:29:46 lr: 0.000003 loss_cls: 3.9787 (3.8902) grad_norm: 2.3631 (2.4326) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-11 00:49:39 root] (utils.py 283): INFO Epoch: [25] [ 180/2502] eta: 0:29:39 lr: 0.000003 loss_cls: 4.0040 (3.8846) grad_norm: 2.3262 (2.4310) time: 0.7667 data: 0.0003 max mem: 8426 +[2024-12-11 00:49:46 root] (utils.py 283): INFO Epoch: [25] [ 190/2502] eta: 0:29:31 lr: 0.000003 loss_cls: 4.0040 (3.8948) grad_norm: 2.4789 (2.4346) time: 0.7684 data: 0.0003 max mem: 8426 +[2024-12-11 00:49:54 root] (utils.py 283): INFO Epoch: [25] [ 200/2502] eta: 0:29:23 lr: 0.000003 loss_cls: 4.0170 (3.8792) grad_norm: 2.4317 (2.4326) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-11 00:50:02 root] (utils.py 283): INFO Epoch: [25] [ 210/2502] eta: 0:29:16 lr: 0.000003 loss_cls: 3.7836 (3.8738) grad_norm: 2.4340 (2.4340) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-11 00:50:09 root] (utils.py 283): INFO Epoch: [25] [ 220/2502] eta: 0:29:09 lr: 0.000003 loss_cls: 3.8786 (3.8728) grad_norm: 2.4497 (2.4333) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-11 00:50:17 root] (utils.py 283): INFO Epoch: [25] [ 230/2502] eta: 0:29:03 lr: 0.000003 loss_cls: 3.9493 (3.8742) grad_norm: 2.4322 (2.4333) time: 0.7799 data: 0.0002 max mem: 8426 +[2024-12-11 00:50:25 root] (utils.py 283): INFO Epoch: [25] [ 240/2502] eta: 0:28:57 lr: 0.000003 loss_cls: 4.1119 (3.8863) grad_norm: 2.4356 (2.4359) time: 0.7811 data: 0.0003 max mem: 8426 +[2024-12-11 00:50:33 root] (utils.py 283): INFO Epoch: [25] [ 250/2502] eta: 0:28:50 lr: 0.000003 loss_cls: 4.1598 (3.8842) grad_norm: 2.4902 (2.4379) time: 0.7830 data: 0.0002 max mem: 8426 +[2024-12-11 00:50:41 root] (utils.py 283): INFO Epoch: [25] [ 260/2502] eta: 0:28:44 lr: 0.000003 loss_cls: 4.1598 (3.8939) grad_norm: 2.5240 (2.4399) time: 0.7828 data: 0.0003 max mem: 8426 +[2024-12-11 00:50:48 root] (utils.py 283): INFO Epoch: [25] [ 270/2502] eta: 0:28:37 lr: 0.000003 loss_cls: 4.3141 (3.8957) grad_norm: 2.4388 (2.4388) time: 0.7808 data: 0.0003 max mem: 8426 +[2024-12-11 00:50:56 root] (utils.py 283): INFO Epoch: [25] [ 280/2502] eta: 0:28:30 lr: 0.000003 loss_cls: 4.0042 (3.8948) grad_norm: 2.4347 (2.4388) time: 0.7809 data: 0.0003 max mem: 8426 +[2024-12-11 00:51:04 root] (utils.py 283): INFO Epoch: [25] [ 290/2502] eta: 0:28:24 lr: 0.000003 loss_cls: 3.8821 (3.8908) grad_norm: 2.4347 (2.4383) time: 0.7855 data: 0.0002 max mem: 8426 +[2024-12-11 00:51:12 root] (utils.py 283): INFO Epoch: [25] [ 300/2502] eta: 0:28:17 lr: 0.000003 loss_cls: 3.5768 (3.8777) grad_norm: 2.3671 (2.4391) time: 0.7860 data: 0.0002 max mem: 8426 +[2024-12-11 00:51:20 root] (utils.py 283): INFO Epoch: [25] [ 310/2502] eta: 0:28:11 lr: 0.000003 loss_cls: 4.0564 (3.8907) grad_norm: 2.3221 (2.4360) time: 0.7838 data: 0.0003 max mem: 8426 +[2024-12-11 00:51:28 root] (utils.py 283): INFO Epoch: [25] [ 320/2502] eta: 0:28:04 lr: 0.000003 loss_cls: 4.1982 (3.8956) grad_norm: 2.3773 (2.4372) time: 0.7850 data: 0.0003 max mem: 8426 +[2024-12-11 00:51:35 root] (utils.py 283): INFO Epoch: [25] [ 330/2502] eta: 0:27:56 lr: 0.000003 loss_cls: 4.1233 (3.9024) grad_norm: 2.4523 (2.4362) time: 0.7748 data: 0.0003 max mem: 8426 +[2024-12-11 00:51:43 root] (utils.py 283): INFO Epoch: [25] [ 340/2502] eta: 0:27:47 lr: 0.000003 loss_cls: 4.0784 (3.8989) grad_norm: 2.3715 (2.4345) time: 0.7633 data: 0.0003 max mem: 8426 +[2024-12-11 00:51:51 root] (utils.py 283): INFO Epoch: [25] [ 350/2502] eta: 0:27:39 lr: 0.000003 loss_cls: 4.1548 (3.9073) grad_norm: 2.4033 (2.4355) time: 0.7631 data: 0.0003 max mem: 8426 +[2024-12-11 00:51:58 root] (utils.py 283): INFO Epoch: [25] [ 360/2502] eta: 0:27:31 lr: 0.000003 loss_cls: 4.2992 (3.9123) grad_norm: 2.4715 (2.4365) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-11 00:52:06 root] (utils.py 283): INFO Epoch: [25] [ 370/2502] eta: 0:27:23 lr: 0.000003 loss_cls: 4.0292 (3.9061) grad_norm: 2.4390 (2.4365) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-11 00:52:14 root] (utils.py 283): INFO Epoch: [25] [ 380/2502] eta: 0:27:15 lr: 0.000003 loss_cls: 3.9045 (3.9079) grad_norm: 2.3790 (2.4357) time: 0.7631 data: 0.0002 max mem: 8426 +[2024-12-11 00:52:21 root] (utils.py 283): INFO Epoch: [25] [ 390/2502] eta: 0:27:07 lr: 0.000003 loss_cls: 3.9115 (3.9068) grad_norm: 2.4549 (2.4374) time: 0.7643 data: 0.0002 max mem: 8426 +[2024-12-11 00:52:29 root] (utils.py 283): INFO Epoch: [25] [ 400/2502] eta: 0:26:59 lr: 0.000003 loss_cls: 3.8799 (3.9076) grad_norm: 2.4696 (2.4374) time: 0.7640 data: 0.0003 max mem: 8426 +[2024-12-11 00:52:36 root] (utils.py 283): INFO Epoch: [25] [ 410/2502] eta: 0:26:51 lr: 0.000003 loss_cls: 3.8799 (3.9046) grad_norm: 2.4556 (2.4383) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-11 00:52:44 root] (utils.py 283): INFO Epoch: [25] [ 420/2502] eta: 0:26:43 lr: 0.000003 loss_cls: 3.9791 (3.9116) grad_norm: 2.4733 (2.4392) time: 0.7715 data: 0.0002 max mem: 8426 +[2024-12-11 00:52:52 root] (utils.py 283): INFO Epoch: [25] [ 430/2502] eta: 0:26:35 lr: 0.000003 loss_cls: 4.2802 (3.9188) grad_norm: 2.4503 (2.4394) time: 0.7695 data: 0.0002 max mem: 8426 +[2024-12-11 00:52:59 root] (utils.py 283): INFO Epoch: [25] [ 440/2502] eta: 0:26:27 lr: 0.000003 loss_cls: 4.1109 (3.9160) grad_norm: 2.4310 (2.4388) time: 0.7624 data: 0.0002 max mem: 8426 +[2024-12-11 00:53:07 root] (utils.py 283): INFO Epoch: [25] [ 450/2502] eta: 0:26:19 lr: 0.000003 loss_cls: 4.0385 (3.9130) grad_norm: 2.3985 (2.4382) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-11 00:53:15 root] (utils.py 283): INFO Epoch: [25] [ 460/2502] eta: 0:26:11 lr: 0.000003 loss_cls: 3.9610 (3.9134) grad_norm: 2.3985 (2.4386) time: 0.7661 data: 0.0003 max mem: 8426 +[2024-12-11 00:53:22 root] (utils.py 283): INFO Epoch: [25] [ 470/2502] eta: 0:26:03 lr: 0.000003 loss_cls: 4.1329 (3.9144) grad_norm: 2.3707 (2.4368) time: 0.7638 data: 0.0003 max mem: 8426 +[2024-12-11 00:53:30 root] (utils.py 283): INFO Epoch: [25] [ 480/2502] eta: 0:25:56 lr: 0.000003 loss_cls: 4.1329 (3.9117) grad_norm: 2.3707 (2.4366) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-11 00:53:38 root] (utils.py 283): INFO Epoch: [25] [ 490/2502] eta: 0:25:48 lr: 0.000003 loss_cls: 3.5850 (3.9077) grad_norm: 2.4490 (2.4374) time: 0.7646 data: 0.0003 max mem: 8426 +[2024-12-11 00:53:46 root] (utils.py 283): INFO Epoch: [25] [ 500/2502] eta: 0:25:40 lr: 0.000003 loss_cls: 3.6940 (3.9090) grad_norm: 2.4461 (2.4370) time: 0.7707 data: 0.0003 max mem: 8426 +[2024-12-11 00:53:53 root] (utils.py 283): INFO Epoch: [25] [ 510/2502] eta: 0:25:32 lr: 0.000003 loss_cls: 4.0192 (3.9065) grad_norm: 2.4285 (2.4371) time: 0.7681 data: 0.0002 max mem: 8426 +[2024-12-11 00:54:01 root] (utils.py 283): INFO Epoch: [25] [ 520/2502] eta: 0:25:24 lr: 0.000003 loss_cls: 3.9203 (3.9034) grad_norm: 2.4015 (2.4363) time: 0.7592 data: 0.0003 max mem: 8426 +[2024-12-11 00:54:08 root] (utils.py 283): INFO Epoch: [25] [ 530/2502] eta: 0:25:16 lr: 0.000003 loss_cls: 3.9909 (3.9063) grad_norm: 2.3196 (2.4347) time: 0.7608 data: 0.0003 max mem: 8426 +[2024-12-11 00:54:16 root] (utils.py 283): INFO Epoch: [25] [ 540/2502] eta: 0:25:08 lr: 0.000003 loss_cls: 4.0353 (3.9092) grad_norm: 2.3403 (2.4338) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-11 00:54:24 root] (utils.py 283): INFO Epoch: [25] [ 550/2502] eta: 0:25:00 lr: 0.000003 loss_cls: 4.1979 (3.9159) grad_norm: 2.4411 (2.4334) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-11 00:54:31 root] (utils.py 283): INFO Epoch: [25] [ 560/2502] eta: 0:24:52 lr: 0.000003 loss_cls: 4.1621 (3.9132) grad_norm: 2.4513 (2.4339) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-11 00:54:39 root] (utils.py 283): INFO Epoch: [25] [ 570/2502] eta: 0:24:45 lr: 0.000003 loss_cls: 4.0817 (3.9135) grad_norm: 2.4579 (2.4338) time: 0.7669 data: 0.0002 max mem: 8426 +[2024-12-11 00:54:46 root] (utils.py 283): INFO Epoch: [25] [ 580/2502] eta: 0:24:37 lr: 0.000003 loss_cls: 3.9743 (3.9137) grad_norm: 2.4137 (2.4340) time: 0.7648 data: 0.0003 max mem: 8426 +[2024-12-11 00:54:54 root] (utils.py 283): INFO Epoch: [25] [ 590/2502] eta: 0:24:29 lr: 0.000003 loss_cls: 3.6393 (3.9098) grad_norm: 2.4178 (2.4353) time: 0.7618 data: 0.0003 max mem: 8426 +[2024-12-11 00:55:02 root] (utils.py 283): INFO Epoch: [25] [ 600/2502] eta: 0:24:21 lr: 0.000003 loss_cls: 3.5736 (3.9078) grad_norm: 2.4691 (2.4358) time: 0.7678 data: 0.0003 max mem: 8426 +[2024-12-11 00:55:09 root] (utils.py 283): INFO Epoch: [25] [ 610/2502] eta: 0:24:14 lr: 0.000003 loss_cls: 3.7924 (3.9038) grad_norm: 2.4508 (2.4355) time: 0.7691 data: 0.0002 max mem: 8426 +[2024-12-11 00:55:17 root] (utils.py 283): INFO Epoch: [25] [ 620/2502] eta: 0:24:06 lr: 0.000003 loss_cls: 4.0302 (3.9022) grad_norm: 2.4661 (2.4361) time: 0.7680 data: 0.0003 max mem: 8426 +[2024-12-11 00:55:25 root] (utils.py 283): INFO Epoch: [25] [ 630/2502] eta: 0:23:58 lr: 0.000003 loss_cls: 4.0368 (3.9018) grad_norm: 2.4330 (2.4364) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-11 00:55:32 root] (utils.py 283): INFO Epoch: [25] [ 640/2502] eta: 0:23:50 lr: 0.000003 loss_cls: 3.9044 (3.9000) grad_norm: 2.4015 (2.4359) time: 0.7628 data: 0.0003 max mem: 8426 +[2024-12-11 00:55:40 root] (utils.py 283): INFO Epoch: [25] [ 650/2502] eta: 0:23:42 lr: 0.000003 loss_cls: 3.8005 (3.8973) grad_norm: 2.4144 (2.4363) time: 0.7652 data: 0.0003 max mem: 8426 +[2024-12-11 00:55:48 root] (utils.py 283): INFO Epoch: [25] [ 660/2502] eta: 0:23:35 lr: 0.000003 loss_cls: 3.9831 (3.8992) grad_norm: 2.4160 (2.4362) time: 0.7644 data: 0.0002 max mem: 8426 +[2024-12-11 00:55:55 root] (utils.py 283): INFO Epoch: [25] [ 670/2502] eta: 0:23:27 lr: 0.000003 loss_cls: 3.9962 (3.8974) grad_norm: 2.4455 (2.4370) time: 0.7635 data: 0.0003 max mem: 8426 +[2024-12-11 00:56:03 root] (utils.py 283): INFO Epoch: [25] [ 680/2502] eta: 0:23:19 lr: 0.000003 loss_cls: 3.8988 (3.8968) grad_norm: 2.4602 (2.4376) time: 0.7662 data: 0.0003 max mem: 8426 +[2024-12-11 00:56:11 root] (utils.py 283): INFO Epoch: [25] [ 690/2502] eta: 0:23:11 lr: 0.000003 loss_cls: 3.9608 (3.8978) grad_norm: 2.4200 (2.4376) time: 0.7650 data: 0.0003 max mem: 8426 +[2024-12-11 00:56:18 root] (utils.py 283): INFO Epoch: [25] [ 700/2502] eta: 0:23:04 lr: 0.000003 loss_cls: 3.9608 (3.8954) grad_norm: 2.4092 (2.4375) time: 0.7686 data: 0.0003 max mem: 8426 +[2024-12-11 00:56:26 root] (utils.py 283): INFO Epoch: [25] [ 710/2502] eta: 0:22:56 lr: 0.000003 loss_cls: 4.0127 (3.8972) grad_norm: 2.4105 (2.4371) time: 0.7751 data: 0.0002 max mem: 8426 +[2024-12-11 00:56:34 root] (utils.py 283): INFO Epoch: [25] [ 720/2502] eta: 0:22:49 lr: 0.000003 loss_cls: 4.1676 (3.9003) grad_norm: 2.3534 (2.4364) time: 0.7703 data: 0.0002 max mem: 8426 +[2024-12-11 00:56:42 root] (utils.py 283): INFO Epoch: [25] [ 730/2502] eta: 0:22:41 lr: 0.000003 loss_cls: 4.0190 (3.8969) grad_norm: 2.4450 (2.4379) time: 0.7666 data: 0.0002 max mem: 8426 +[2024-12-11 00:56:49 root] (utils.py 283): INFO Epoch: [25] [ 740/2502] eta: 0:22:33 lr: 0.000003 loss_cls: 3.6685 (3.8968) grad_norm: 2.4403 (2.4375) time: 0.7723 data: 0.0003 max mem: 8426 +[2024-12-11 00:56:57 root] (utils.py 283): INFO Epoch: [25] [ 750/2502] eta: 0:22:26 lr: 0.000003 loss_cls: 3.7471 (3.8944) grad_norm: 2.3586 (2.4362) time: 0.7720 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:05 root] (utils.py 283): INFO Epoch: [25] [ 760/2502] eta: 0:22:18 lr: 0.000003 loss_cls: 4.1235 (3.8982) grad_norm: 2.4122 (2.4363) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:12 root] (utils.py 283): INFO Epoch: [25] [ 770/2502] eta: 0:22:10 lr: 0.000003 loss_cls: 4.3038 (3.9001) grad_norm: 2.4593 (2.4371) time: 0.7638 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:20 root] (utils.py 283): INFO Epoch: [25] [ 780/2502] eta: 0:22:02 lr: 0.000003 loss_cls: 4.2038 (3.9017) grad_norm: 2.4395 (2.4363) time: 0.7630 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:28 root] (utils.py 283): INFO Epoch: [25] [ 790/2502] eta: 0:21:54 lr: 0.000003 loss_cls: 4.2332 (3.9060) grad_norm: 2.3431 (2.4363) time: 0.7621 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:35 root] (utils.py 283): INFO Epoch: [25] [ 800/2502] eta: 0:21:47 lr: 0.000003 loss_cls: 4.2056 (3.9057) grad_norm: 2.4775 (2.4372) time: 0.7609 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:43 root] (utils.py 283): INFO Epoch: [25] [ 810/2502] eta: 0:21:39 lr: 0.000003 loss_cls: 4.0397 (3.9042) grad_norm: 2.4402 (2.4366) time: 0.7626 data: 0.0003 max mem: 8426 +[2024-12-11 00:57:50 root] (utils.py 283): INFO Epoch: [25] [ 820/2502] eta: 0:21:31 lr: 0.000003 loss_cls: 4.0513 (3.9063) grad_norm: 2.3991 (2.4375) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-11 00:57:58 root] (utils.py 283): INFO Epoch: [25] [ 830/2502] eta: 0:21:23 lr: 0.000003 loss_cls: 4.0614 (3.9063) grad_norm: 2.4082 (2.4378) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-11 00:58:06 root] (utils.py 283): INFO Epoch: [25] [ 840/2502] eta: 0:21:15 lr: 0.000003 loss_cls: 3.9833 (3.9068) grad_norm: 2.3781 (2.4371) time: 0.7616 data: 0.0003 max mem: 8426 +[2024-12-11 00:58:13 root] (utils.py 283): INFO Epoch: [25] [ 850/2502] eta: 0:21:08 lr: 0.000003 loss_cls: 3.9930 (3.9077) grad_norm: 2.4257 (2.4382) time: 0.7605 data: 0.0003 max mem: 8426 +[2024-12-11 00:58:21 root] (utils.py 283): INFO Epoch: [25] [ 860/2502] eta: 0:21:00 lr: 0.000003 loss_cls: 3.9930 (3.9064) grad_norm: 2.4592 (2.4376) time: 0.7697 data: 0.0003 max mem: 8426 +[2024-12-11 00:58:29 root] (utils.py 283): INFO Epoch: [25] [ 870/2502] eta: 0:20:52 lr: 0.000003 loss_cls: 3.9412 (3.9066) grad_norm: 2.4472 (2.4381) time: 0.7716 data: 0.0003 max mem: 8426 +[2024-12-11 00:58:36 root] (utils.py 283): INFO Epoch: [25] [ 880/2502] eta: 0:20:45 lr: 0.000003 loss_cls: 4.1765 (3.9087) grad_norm: 2.4472 (2.4385) time: 0.7656 data: 0.0002 max mem: 8426 +[2024-12-11 00:58:44 root] (utils.py 283): INFO Epoch: [25] [ 890/2502] eta: 0:20:37 lr: 0.000003 loss_cls: 4.1928 (3.9093) grad_norm: 2.4410 (2.4381) time: 0.7642 data: 0.0003 max mem: 8426 +[2024-12-11 00:58:52 root] (utils.py 283): INFO Epoch: [25] [ 900/2502] eta: 0:20:29 lr: 0.000003 loss_cls: 4.1129 (3.9103) grad_norm: 2.4270 (2.4380) time: 0.7610 data: 0.0002 max mem: 8426 +[2024-12-11 00:58:59 root] (utils.py 283): INFO Epoch: [25] [ 910/2502] eta: 0:20:22 lr: 0.000003 loss_cls: 3.8661 (3.9089) grad_norm: 2.4032 (2.4381) time: 0.7652 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:07 root] (utils.py 283): INFO Epoch: [25] [ 920/2502] eta: 0:20:14 lr: 0.000003 loss_cls: 3.7769 (3.9075) grad_norm: 2.3863 (2.4376) time: 0.7657 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:14 root] (utils.py 283): INFO Epoch: [25] [ 930/2502] eta: 0:20:06 lr: 0.000003 loss_cls: 3.7971 (3.9074) grad_norm: 2.4438 (2.4378) time: 0.7618 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:22 root] (utils.py 283): INFO Epoch: [25] [ 940/2502] eta: 0:19:58 lr: 0.000003 loss_cls: 4.1099 (3.9068) grad_norm: 2.4716 (2.4380) time: 0.7619 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:30 root] (utils.py 283): INFO Epoch: [25] [ 950/2502] eta: 0:19:50 lr: 0.000003 loss_cls: 4.0704 (3.9069) grad_norm: 2.3845 (2.4374) time: 0.7596 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:37 root] (utils.py 283): INFO Epoch: [25] [ 960/2502] eta: 0:19:43 lr: 0.000003 loss_cls: 4.1483 (3.9095) grad_norm: 2.3217 (2.4371) time: 0.7594 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:45 root] (utils.py 283): INFO Epoch: [25] [ 970/2502] eta: 0:19:35 lr: 0.000003 loss_cls: 4.0494 (3.9067) grad_norm: 2.3640 (2.4368) time: 0.7623 data: 0.0002 max mem: 8426 +[2024-12-11 00:59:53 root] (utils.py 283): INFO Epoch: [25] [ 980/2502] eta: 0:19:27 lr: 0.000003 loss_cls: 3.8504 (3.9060) grad_norm: 2.3967 (2.4366) time: 0.7728 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:00 root] (utils.py 283): INFO Epoch: [25] [ 990/2502] eta: 0:19:20 lr: 0.000003 loss_cls: 4.0088 (3.9085) grad_norm: 2.4493 (2.4371) time: 0.7757 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:08 root] (utils.py 283): INFO Epoch: [25] [1000/2502] eta: 0:19:12 lr: 0.000003 loss_cls: 4.0337 (3.9061) grad_norm: 2.4099 (2.4363) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:16 root] (utils.py 283): INFO Epoch: [25] [1010/2502] eta: 0:19:04 lr: 0.000003 loss_cls: 3.6951 (3.9057) grad_norm: 2.3880 (2.4366) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:23 root] (utils.py 283): INFO Epoch: [25] [1020/2502] eta: 0:18:56 lr: 0.000003 loss_cls: 4.0407 (3.9060) grad_norm: 2.4394 (2.4367) time: 0.7588 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:31 root] (utils.py 283): INFO Epoch: [25] [1030/2502] eta: 0:18:49 lr: 0.000003 loss_cls: 4.0434 (3.9051) grad_norm: 2.4096 (2.4369) time: 0.7664 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:39 root] (utils.py 283): INFO Epoch: [25] [1040/2502] eta: 0:18:41 lr: 0.000003 loss_cls: 3.8630 (3.9045) grad_norm: 2.4247 (2.4369) time: 0.7714 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:46 root] (utils.py 283): INFO Epoch: [25] [1050/2502] eta: 0:18:33 lr: 0.000003 loss_cls: 3.8934 (3.9026) grad_norm: 2.3758 (2.4359) time: 0.7635 data: 0.0002 max mem: 8426 +[2024-12-11 01:00:54 root] (utils.py 283): INFO Epoch: [25] [1060/2502] eta: 0:18:26 lr: 0.000003 loss_cls: 3.9779 (3.9014) grad_norm: 2.3200 (2.4353) time: 0.7649 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:02 root] (utils.py 283): INFO Epoch: [25] [1070/2502] eta: 0:18:18 lr: 0.000003 loss_cls: 3.9496 (3.9012) grad_norm: 2.3466 (2.4348) time: 0.7739 data: 0.0003 max mem: 8426 +[2024-12-11 01:01:09 root] (utils.py 283): INFO Epoch: [25] [1080/2502] eta: 0:18:11 lr: 0.000003 loss_cls: 3.9263 (3.9006) grad_norm: 2.4090 (2.4353) time: 0.7701 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:17 root] (utils.py 283): INFO Epoch: [25] [1090/2502] eta: 0:18:03 lr: 0.000003 loss_cls: 4.0021 (3.9015) grad_norm: 2.5261 (2.4358) time: 0.7639 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:25 root] (utils.py 283): INFO Epoch: [25] [1100/2502] eta: 0:17:55 lr: 0.000003 loss_cls: 3.9882 (3.9006) grad_norm: 2.5277 (2.4366) time: 0.7668 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:32 root] (utils.py 283): INFO Epoch: [25] [1110/2502] eta: 0:17:48 lr: 0.000003 loss_cls: 3.8572 (3.9006) grad_norm: 2.4289 (2.4367) time: 0.7660 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:40 root] (utils.py 283): INFO Epoch: [25] [1120/2502] eta: 0:17:40 lr: 0.000003 loss_cls: 4.2039 (3.9042) grad_norm: 2.4289 (2.4365) time: 0.7641 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:48 root] (utils.py 283): INFO Epoch: [25] [1130/2502] eta: 0:17:32 lr: 0.000003 loss_cls: 4.0224 (3.9039) grad_norm: 2.4027 (2.4361) time: 0.7626 data: 0.0002 max mem: 8426 +[2024-12-11 01:01:55 root] (utils.py 283): INFO Epoch: [25] [1140/2502] eta: 0:17:24 lr: 0.000003 loss_cls: 3.8824 (3.9031) grad_norm: 2.3808 (2.4362) time: 0.7676 data: 0.0002 max mem: 8426 +[2024-12-11 01:02:03 root] (utils.py 283): INFO Epoch: [25] [1150/2502] eta: 0:17:17 lr: 0.000003 loss_cls: 3.9674 (3.9031) grad_norm: 2.4018 (2.4359) time: 0.7699 data: 0.0002 max mem: 8426 +[2024-12-11 01:02:11 root] (utils.py 283): INFO Epoch: [25] [1160/2502] eta: 0:17:09 lr: 0.000003 loss_cls: 3.8911 (3.9011) grad_norm: 2.3544 (2.4351) time: 0.7687 data: 0.0002 max mem: 8426 +[2024-12-11 01:02:18 root] (utils.py 283): INFO Epoch: [25] [1170/2502] eta: 0:17:01 lr: 0.000003 loss_cls: 3.8911 (3.9016) grad_norm: 2.3519 (2.4350) time: 0.7681 data: 0.0003 max mem: 8426 +[2024-12-11 01:02:26 root] (utils.py 283): INFO Epoch: [25] [1180/2502] eta: 0:16:54 lr: 0.000003 loss_cls: 4.0750 (3.9016) grad_norm: 2.3824 (2.4353) time: 0.7632 data: 0.0002 max mem: 8426 +[2024-12-11 01:02:34 root] (utils.py 283): INFO Epoch: [25] [1190/2502] eta: 0:16:46 lr: 0.000003 loss_cls: 4.0395 (3.9028) grad_norm: 2.4809 (2.4356) time: 0.7627 data: 0.0002 max mem: 8426 +[2024-12-11 01:02:41 root] (utils.py 283): INFO Epoch: [25] [1200/2502] eta: 0:16:38 lr: 0.000003 loss_cls: 4.0395 (3.9028) grad_norm: 2.4846 (2.4362) time: 0.7615 data: 0.0003 max mem: 8426 +[2024-12-11 01:02:49 root] (utils.py 283): INFO Epoch: [25] [1210/2502] eta: 0:16:30 lr: 0.000003 loss_cls: 4.0713 (3.9033) grad_norm: 2.4502 (2.4360) time: 0.7598 data: 0.0003 max mem: 8426 +[2024-12-11 01:02:57 root] (utils.py 283): INFO Epoch: [25] [1220/2502] eta: 0:16:23 lr: 0.000003 loss_cls: 4.1172 (3.9042) grad_norm: 2.3834 (2.4358) time: 0.7670 data: 0.0003 max mem: 8426 +[2024-12-11 01:03:04 root] (utils.py 283): INFO Epoch: [25] [1230/2502] eta: 0:16:15 lr: 0.000003 loss_cls: 4.1568 (3.9040) grad_norm: 2.3911 (2.4355) time: 0.7666 data: 0.0003 max mem: 8426 +[2024-12-11 01:03:12 root] (utils.py 283): INFO Epoch: [25] [1240/2502] eta: 0:16:08 lr: 0.000003 loss_cls: 4.1004 (3.9060) grad_norm: 2.4128 (2.4360) time: 0.7659 data: 0.0003 max mem: 8426 +[2024-12-11 01:03:20 root] (utils.py 283): INFO Epoch: [25] [1250/2502] eta: 0:16:00 lr: 0.000003 loss_cls: 4.0879 (3.9052) grad_norm: 2.3505 (2.4352) time: 0.7696 data: 0.0003 max mem: 8426 +[2024-12-11 01:03:27 root] (utils.py 283): INFO Epoch: [25] [1260/2502] eta: 0:15:52 lr: 0.000003 loss_cls: 4.1822 (3.9054) grad_norm: 2.3579 (2.4350) time: 0.7671 data: 0.0003 max mem: 8426 +[2024-12-11 01:03:35 root] (utils.py 283): INFO Epoch: [25] [1270/2502] eta: 0:15:44 lr: 0.000003 loss_cls: 4.1147 (3.9043) grad_norm: 2.4392 (2.4347) time: 0.7651 data: 0.0002 max mem: 8426 +[2024-12-11 01:03:43 root] (utils.py 283): INFO Epoch: [25] [1280/2502] eta: 0:15:37 lr: 0.000003 loss_cls: 3.8911 (3.9040) grad_norm: 2.4688 (2.4353) time: 0.7648 data: 0.0002 max mem: 8426 +[2024-12-11 01:03:50 root] (utils.py 283): INFO Epoch: [25] [1290/2502] eta: 0:15:29 lr: 0.000003 loss_cls: 3.7637 (3.9034) grad_norm: 2.4826 (2.4359) time: 0.7663 data: 0.0002 max mem: 8426 +[2024-12-11 01:03:58 root] (utils.py 283): INFO Epoch: [25] [1300/2502] eta: 0:15:21 lr: 0.000003 loss_cls: 3.9473 (3.9044) grad_norm: 2.4649 (2.4364) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-11 01:04:06 root] (utils.py 283): INFO Epoch: [25] [1310/2502] eta: 0:15:14 lr: 0.000003 loss_cls: 3.9917 (3.9023) grad_norm: 2.4432 (2.4365) time: 0.7643 data: 0.0003 max mem: 8426 +[2024-12-11 01:04:13 root] (utils.py 283): INFO Epoch: [25] [1320/2502] eta: 0:15:06 lr: 0.000003 loss_cls: 4.0737 (3.9040) grad_norm: 2.3966 (2.4360) time: 0.7664 data: 0.0003 max mem: 8426 +[2024-12-11 01:04:22 root] (utils.py 283): INFO Epoch: [25] [1330/2502] eta: 0:14:59 lr: 0.000003 loss_cls: 4.1954 (3.9057) grad_norm: 2.3478 (2.4357) time: 0.8176 data: 0.0010 max mem: 8426 +[2024-12-11 01:04:33 root] (utils.py 283): INFO Epoch: [25] [1340/2502] eta: 0:14:55 lr: 0.000003 loss_cls: 3.9879 (3.9057) grad_norm: 2.4033 (2.4359) time: 1.0043 data: 0.0018 max mem: 8426 +[2024-12-11 01:05:07 root] (utils.py 283): INFO Epoch: [25] [1350/2502] eta: 0:15:09 lr: 0.000003 loss_cls: 3.9879 (3.9076) grad_norm: 2.4033 (2.4358) time: 2.2323 data: 0.0011 max mem: 8426 +[2024-12-11 01:05:14 root] (utils.py 283): INFO Epoch: [25] [1360/2502] eta: 0:15:01 lr: 0.000003 loss_cls: 4.0370 (3.9070) grad_norm: 2.3655 (2.4353) time: 2.0579 data: 0.0004 max mem: 8426 +[2024-12-11 01:05:22 root] (utils.py 283): INFO Epoch: [25] [1370/2502] eta: 0:14:53 lr: 0.000003 loss_cls: 3.9242 (3.9071) grad_norm: 2.4069 (2.4356) time: 0.7932 data: 0.0003 max mem: 8426 +[2024-12-11 01:05:32 root] (utils.py 283): INFO Epoch: [25] [1380/2502] eta: 0:14:47 lr: 0.000003 loss_cls: 3.9944 (3.9068) grad_norm: 2.4380 (2.4355) time: 0.8694 data: 0.0003 max mem: 8426 +[2024-12-11 01:05:44 root] (utils.py 283): INFO Epoch: [25] [1390/2502] eta: 0:14:42 lr: 0.000003 loss_cls: 3.8343 (3.9055) grad_norm: 2.3625 (2.4352) time: 1.0874 data: 0.0004 max mem: 8426 +[2024-12-11 01:06:08 root] (utils.py 283): INFO Epoch: [25] [1400/2502] eta: 0:14:46 lr: 0.000003 loss_cls: 3.5465 (3.9028) grad_norm: 2.3878 (2.4351) time: 1.7882 data: 0.0003 max mem: 8426 +[2024-12-11 01:06:33 root] (utils.py 283): INFO Epoch: [25] [1410/2502] eta: 0:14:51 lr: 0.000003 loss_cls: 3.9438 (3.9049) grad_norm: 2.4094 (2.4352) time: 2.4209 data: 0.0003 max mem: 8426 +[2024-12-11 01:06:56 root] (utils.py 283): INFO Epoch: [25] [1420/2502] eta: 0:14:55 lr: 0.000003 loss_cls: 4.1343 (3.9041) grad_norm: 2.3567 (2.4352) time: 2.4215 data: 0.0003 max mem: 8426 +[2024-12-11 01:07:20 root] (utils.py 283): INFO Epoch: [25] [1430/2502] eta: 0:14:58 lr: 0.000003 loss_cls: 4.1547 (3.9046) grad_norm: 2.4470 (2.4353) time: 2.3586 data: 0.0003 max mem: 8426 +[2024-12-11 01:07:43 root] (utils.py 283): INFO Epoch: [25] [1440/2502] eta: 0:15:01 lr: 0.000003 loss_cls: 4.1056 (3.9050) grad_norm: 2.4470 (2.4353) time: 2.3330 data: 0.0003 max mem: 8426 +[2024-12-11 01:08:06 root] (utils.py 283): INFO Epoch: [25] [1450/2502] eta: 0:15:03 lr: 0.000003 loss_cls: 3.8704 (3.9047) grad_norm: 2.4452 (2.4355) time: 2.3276 data: 0.0003 max mem: 8426 +[2024-12-11 01:08:30 root] (utils.py 283): INFO Epoch: [25] [1460/2502] eta: 0:15:05 lr: 0.000003 loss_cls: 4.3213 (3.9063) grad_norm: 2.4782 (2.4359) time: 2.3620 data: 0.0003 max mem: 8426 +[2024-12-11 01:08:54 root] (utils.py 283): INFO Epoch: [25] [1470/2502] eta: 0:15:07 lr: 0.000003 loss_cls: 4.3228 (3.9071) grad_norm: 2.3928 (2.4358) time: 2.3745 data: 0.0003 max mem: 8426 +[2024-12-11 01:09:17 root] (utils.py 283): INFO Epoch: [25] [1480/2502] eta: 0:15:08 lr: 0.000003 loss_cls: 3.9964 (3.9069) grad_norm: 2.3374 (2.4352) time: 2.3518 data: 0.0003 max mem: 8426 +[2024-12-11 01:09:40 root] (utils.py 283): INFO Epoch: [25] [1490/2502] eta: 0:15:09 lr: 0.000003 loss_cls: 3.8945 (3.9048) grad_norm: 2.3632 (2.4349) time: 2.3233 data: 0.0003 max mem: 8426 +[2024-12-11 01:10:03 root] (utils.py 283): INFO Epoch: [25] [1500/2502] eta: 0:15:10 lr: 0.000003 loss_cls: 3.7401 (3.9049) grad_norm: 2.4083 (2.4345) time: 2.3182 data: 0.0003 max mem: 8426 +[2024-12-11 01:10:26 root] (utils.py 283): INFO Epoch: [25] [1510/2502] eta: 0:15:10 lr: 0.000003 loss_cls: 3.9570 (3.9047) grad_norm: 2.3681 (2.4341) time: 2.3079 data: 0.0003 max mem: 8426 +[2024-12-11 01:10:49 root] (utils.py 283): INFO Epoch: [25] [1520/2502] eta: 0:15:09 lr: 0.000003 loss_cls: 4.1277 (3.9055) grad_norm: 2.3681 (2.4339) time: 2.2855 data: 0.0003 max mem: 8426 +[2024-12-11 01:11:12 root] (utils.py 283): INFO Epoch: [25] [1530/2502] eta: 0:15:09 lr: 0.000003 loss_cls: 3.8492 (3.9050) grad_norm: 2.4039 (2.4337) time: 2.2828 data: 0.0003 max mem: 8426 +[2024-12-11 01:11:36 root] (utils.py 283): INFO Epoch: [25] [1540/2502] eta: 0:15:08 lr: 0.000003 loss_cls: 3.8492 (3.9043) grad_norm: 2.4855 (2.4344) time: 2.3405 data: 0.0003 max mem: 8426 +[2024-12-11 01:11:59 root] (utils.py 283): INFO Epoch: [25] [1550/2502] eta: 0:15:08 lr: 0.000003 loss_cls: 3.9909 (3.9048) grad_norm: 2.4587 (2.4343) time: 2.3643 data: 0.0003 max mem: 8426 +[2024-12-11 01:12:22 root] (utils.py 283): INFO Epoch: [25] [1560/2502] eta: 0:15:06 lr: 0.000003 loss_cls: 4.0836 (3.9043) grad_norm: 2.4155 (2.4345) time: 2.3286 data: 0.0003 max mem: 8426 +[2024-12-11 01:12:46 root] (utils.py 283): INFO Epoch: [25] [1570/2502] eta: 0:15:05 lr: 0.000003 loss_cls: 3.7201 (3.9018) grad_norm: 2.3838 (2.4340) time: 2.3192 data: 0.0003 max mem: 8426 +[2024-12-11 01:13:09 root] (utils.py 283): INFO Epoch: [25] [1580/2502] eta: 0:15:03 lr: 0.000003 loss_cls: 3.4963 (3.9004) grad_norm: 2.3781 (2.4340) time: 2.3264 data: 0.0003 max mem: 8426 +[2024-12-11 01:13:32 root] (utils.py 283): INFO Epoch: [25] [1590/2502] eta: 0:15:01 lr: 0.000003 loss_cls: 3.6513 (3.8995) grad_norm: 2.3726 (2.4335) time: 2.3171 data: 0.0003 max mem: 8426 +[2024-12-11 01:13:55 root] (utils.py 283): INFO Epoch: [25] [1600/2502] eta: 0:14:58 lr: 0.000003 loss_cls: 4.0303 (3.9007) grad_norm: 2.3732 (2.4334) time: 2.2834 data: 0.0003 max mem: 8426 +[2024-12-11 01:14:18 root] (utils.py 283): INFO Epoch: [25] [1610/2502] eta: 0:14:55 lr: 0.000003 loss_cls: 3.7110 (3.8985) grad_norm: 2.4467 (2.4336) time: 2.2789 data: 0.0003 max mem: 8426 +[2024-12-11 01:14:42 root] (utils.py 283): INFO Epoch: [25] [1620/2502] eta: 0:14:53 lr: 0.000003 loss_cls: 3.5521 (3.8989) grad_norm: 2.4445 (2.4337) time: 2.3574 data: 0.0002 max mem: 8426 +[2024-12-11 01:15:06 root] (utils.py 283): INFO Epoch: [25] [1630/2502] eta: 0:14:50 lr: 0.000003 loss_cls: 4.1458 (3.9004) grad_norm: 2.3903 (2.4335) time: 2.4231 data: 0.0003 max mem: 8426 +[2024-12-11 01:15:30 root] (utils.py 283): INFO Epoch: [25] [1640/2502] eta: 0:14:47 lr: 0.000003 loss_cls: 4.1424 (3.8994) grad_norm: 2.3658 (2.4335) time: 2.4188 data: 0.0003 max mem: 8426 +[2024-12-11 01:15:55 root] (utils.py 283): INFO Epoch: [25] [1650/2502] eta: 0:14:44 lr: 0.000003 loss_cls: 3.7981 (3.8987) grad_norm: 2.4604 (2.4337) time: 2.4267 data: 0.0003 max mem: 8426 +[2024-12-11 01:16:18 root] (utils.py 283): INFO Epoch: [25] [1660/2502] eta: 0:14:41 lr: 0.000003 loss_cls: 3.9691 (3.8980) grad_norm: 2.4604 (2.4335) time: 2.4077 data: 0.0003 max mem: 8426 +[2024-12-11 01:16:42 root] (utils.py 283): INFO Epoch: [25] [1670/2502] eta: 0:14:37 lr: 0.000003 loss_cls: 3.7930 (3.8981) grad_norm: 2.4176 (2.4336) time: 2.3845 data: 0.0003 max mem: 8426 +[2024-12-11 01:17:06 root] (utils.py 283): INFO Epoch: [25] [1680/2502] eta: 0:14:33 lr: 0.000003 loss_cls: 3.8460 (3.8986) grad_norm: 2.4354 (2.4338) time: 2.3892 data: 0.0003 max mem: 8426 +[2024-12-11 01:17:29 root] (utils.py 283): INFO Epoch: [25] [1690/2502] eta: 0:14:28 lr: 0.000003 loss_cls: 4.0050 (3.8985) grad_norm: 2.4288 (2.4335) time: 2.3524 data: 0.0003 max mem: 8426 +[2024-12-11 01:17:53 root] (utils.py 283): INFO Epoch: [25] [1700/2502] eta: 0:14:24 lr: 0.000003 loss_cls: 4.1021 (3.8990) grad_norm: 2.4272 (2.4338) time: 2.3344 data: 0.0003 max mem: 8426 +[2024-12-11 01:18:16 root] (utils.py 283): INFO Epoch: [25] [1710/2502] eta: 0:14:19 lr: 0.000003 loss_cls: 4.0893 (3.8977) grad_norm: 2.4998 (2.4341) time: 2.3544 data: 0.0003 max mem: 8426 +[2024-12-11 01:18:40 root] (utils.py 283): INFO Epoch: [25] [1720/2502] eta: 0:14:14 lr: 0.000003 loss_cls: 3.7281 (3.8968) grad_norm: 2.4725 (2.4343) time: 2.3446 data: 0.0003 max mem: 8426 +[2024-12-11 01:19:03 root] (utils.py 283): INFO Epoch: [25] [1730/2502] eta: 0:14:08 lr: 0.000003 loss_cls: 3.6164 (3.8968) grad_norm: 2.4460 (2.4344) time: 2.3097 data: 0.0003 max mem: 8426 +[2024-12-11 01:19:26 root] (utils.py 283): INFO Epoch: [25] [1740/2502] eta: 0:14:02 lr: 0.000003 loss_cls: 3.6198 (3.8965) grad_norm: 2.4460 (2.4345) time: 2.3132 data: 0.0003 max mem: 8426 +[2024-12-11 01:19:49 root] (utils.py 283): INFO Epoch: [25] [1750/2502] eta: 0:13:56 lr: 0.000003 loss_cls: 3.9865 (3.8975) grad_norm: 2.3980 (2.4344) time: 2.3100 data: 0.0003 max mem: 8426 +[2024-12-11 01:20:13 root] (utils.py 283): INFO Epoch: [25] [1760/2502] eta: 0:13:51 lr: 0.000003 loss_cls: 4.0739 (3.8976) grad_norm: 2.4196 (2.4342) time: 2.3320 data: 0.0003 max mem: 8426 +[2024-12-11 01:20:36 root] (utils.py 283): INFO Epoch: [25] [1770/2502] eta: 0:13:45 lr: 0.000003 loss_cls: 3.7669 (3.8969) grad_norm: 2.4411 (2.4344) time: 2.3701 data: 0.0002 max mem: 8426 +[2024-12-11 01:21:00 root] (utils.py 283): INFO Epoch: [25] [1780/2502] eta: 0:13:38 lr: 0.000003 loss_cls: 3.7669 (3.8972) grad_norm: 2.4396 (2.4342) time: 2.3721 data: 0.0003 max mem: 8426 +[2024-12-11 01:21:24 root] (utils.py 283): INFO Epoch: [25] [1790/2502] eta: 0:13:32 lr: 0.000003 loss_cls: 4.1330 (3.8978) grad_norm: 2.4193 (2.4341) time: 2.3800 data: 0.0003 max mem: 8426 +[2024-12-11 01:21:48 root] (utils.py 283): INFO Epoch: [25] [1800/2502] eta: 0:13:25 lr: 0.000003 loss_cls: 4.1247 (3.8984) grad_norm: 2.3874 (2.4342) time: 2.3740 data: 0.0003 max mem: 8426 +[2024-12-11 01:22:11 root] (utils.py 283): INFO Epoch: [25] [1810/2502] eta: 0:13:18 lr: 0.000003 loss_cls: 4.1247 (3.8982) grad_norm: 2.3874 (2.4342) time: 2.3483 data: 0.0003 max mem: 8426 +[2024-12-11 01:22:34 root] (utils.py 283): INFO Epoch: [25] [1820/2502] eta: 0:13:11 lr: 0.000003 loss_cls: 3.8249 (3.8976) grad_norm: 2.3980 (2.4343) time: 2.3254 data: 0.0003 max mem: 8426 +[2024-12-11 01:22:58 root] (utils.py 283): INFO Epoch: [25] [1830/2502] eta: 0:13:04 lr: 0.000003 loss_cls: 3.8249 (3.8979) grad_norm: 2.3976 (2.4339) time: 2.3452 data: 0.0003 max mem: 8426 +[2024-12-11 01:23:21 root] (utils.py 283): INFO Epoch: [25] [1840/2502] eta: 0:12:56 lr: 0.000003 loss_cls: 3.9882 (3.8983) grad_norm: 2.3780 (2.4338) time: 2.3346 data: 0.0003 max mem: 8426 +[2024-12-11 01:23:44 root] (utils.py 283): INFO Epoch: [25] [1850/2502] eta: 0:12:49 lr: 0.000003 loss_cls: 3.9440 (3.8971) grad_norm: 2.4935 (2.4341) time: 2.2978 data: 0.0003 max mem: 8426 +[2024-12-11 01:24:07 root] (utils.py 283): INFO Epoch: [25] [1860/2502] eta: 0:12:41 lr: 0.000003 loss_cls: 3.9440 (3.8980) grad_norm: 2.4965 (2.4342) time: 2.3245 data: 0.0003 max mem: 8426 +[2024-12-11 01:24:31 root] (utils.py 283): INFO Epoch: [25] [1870/2502] eta: 0:12:33 lr: 0.000003 loss_cls: 4.0591 (3.8983) grad_norm: 2.4164 (2.4343) time: 2.3638 data: 0.0003 max mem: 8426 +[2024-12-11 01:24:55 root] (utils.py 283): INFO Epoch: [25] [1880/2502] eta: 0:12:25 lr: 0.000003 loss_cls: 4.0591 (3.8968) grad_norm: 2.4387 (2.4345) time: 2.3994 data: 0.0003 max mem: 8426 +[2024-12-11 01:25:19 root] (utils.py 283): INFO Epoch: [25] [1890/2502] eta: 0:12:17 lr: 0.000003 loss_cls: 3.8744 (3.8960) grad_norm: 2.4820 (2.4347) time: 2.3838 data: 0.0003 max mem: 8426 +[2024-12-11 01:25:42 root] (utils.py 283): INFO Epoch: [25] [1900/2502] eta: 0:12:09 lr: 0.000003 loss_cls: 4.1223 (3.8968) grad_norm: 2.4873 (2.4349) time: 2.3456 data: 0.0003 max mem: 8426 +[2024-12-11 01:26:06 root] (utils.py 283): INFO Epoch: [25] [1910/2502] eta: 0:12:00 lr: 0.000003 loss_cls: 4.0922 (3.8969) grad_norm: 2.5097 (2.4352) time: 2.3606 data: 0.0003 max mem: 8426 +[2024-12-11 01:26:30 root] (utils.py 283): INFO Epoch: [25] [1920/2502] eta: 0:11:51 lr: 0.000003 loss_cls: 4.0922 (3.8973) grad_norm: 2.4408 (2.4350) time: 2.3847 data: 0.0003 max mem: 8426 +[2024-12-11 01:26:54 root] (utils.py 283): INFO Epoch: [25] [1930/2502] eta: 0:11:43 lr: 0.000003 loss_cls: 3.9119 (3.8972) grad_norm: 2.3873 (2.4350) time: 2.3903 data: 0.0003 max mem: 8426 +[2024-12-11 01:27:17 root] (utils.py 283): INFO Epoch: [25] [1940/2502] eta: 0:11:34 lr: 0.000003 loss_cls: 4.1054 (3.8977) grad_norm: 2.4857 (2.4353) time: 2.3765 data: 0.0003 max mem: 8426 +[2024-12-11 01:27:41 root] (utils.py 283): INFO Epoch: [25] [1950/2502] eta: 0:11:24 lr: 0.000003 loss_cls: 3.9751 (3.8977) grad_norm: 2.4748 (2.4353) time: 2.3691 data: 0.0003 max mem: 8426 +[2024-12-11 01:28:05 root] (utils.py 283): INFO Epoch: [25] [1960/2502] eta: 0:11:15 lr: 0.000003 loss_cls: 3.7265 (3.8962) grad_norm: 2.3708 (2.4350) time: 2.3658 data: 0.0002 max mem: 8426 +[2024-12-11 01:28:28 root] (utils.py 283): INFO Epoch: [25] [1970/2502] eta: 0:11:06 lr: 0.000003 loss_cls: 3.7265 (3.8960) grad_norm: 2.3225 (2.4346) time: 2.3504 data: 0.0002 max mem: 8426 +[2024-12-11 01:28:52 root] (utils.py 283): INFO Epoch: [25] [1980/2502] eta: 0:10:56 lr: 0.000003 loss_cls: 3.7731 (3.8947) grad_norm: 2.3747 (2.4346) time: 2.3558 data: 0.0003 max mem: 8426 +[2024-12-11 01:29:15 root] (utils.py 283): INFO Epoch: [25] [1990/2502] eta: 0:10:46 lr: 0.000003 loss_cls: 3.8963 (3.8947) grad_norm: 2.3936 (2.4345) time: 2.3620 data: 0.0003 max mem: 8426 +[2024-12-11 01:29:39 root] (utils.py 283): INFO Epoch: [25] [2000/2502] eta: 0:10:36 lr: 0.000003 loss_cls: 4.0716 (3.8943) grad_norm: 2.4030 (2.4351) time: 2.3614 data: 0.0003 max mem: 8426 +[2024-12-11 01:30:02 root] (utils.py 283): INFO Epoch: [25] [2010/2502] eta: 0:10:26 lr: 0.000003 loss_cls: 3.9081 (3.8949) grad_norm: 2.5017 (2.4350) time: 2.3556 data: 0.0003 max mem: 8426 +[2024-12-11 01:30:26 root] (utils.py 283): INFO Epoch: [25] [2020/2502] eta: 0:10:16 lr: 0.000003 loss_cls: 3.9836 (3.8940) grad_norm: 2.4082 (2.4349) time: 2.3556 data: 0.0003 max mem: 8426 +[2024-12-11 01:30:50 root] (utils.py 283): INFO Epoch: [25] [2030/2502] eta: 0:10:06 lr: 0.000003 loss_cls: 4.1743 (3.8957) grad_norm: 2.4009 (2.4347) time: 2.3824 data: 0.0003 max mem: 8426 +[2024-12-11 01:31:14 root] (utils.py 283): INFO Epoch: [25] [2040/2502] eta: 0:09:56 lr: 0.000003 loss_cls: 3.9594 (3.8938) grad_norm: 2.3462 (2.4345) time: 2.3717 data: 0.0003 max mem: 8426 +[2024-12-11 01:31:37 root] (utils.py 283): INFO Epoch: [25] [2050/2502] eta: 0:09:45 lr: 0.000003 loss_cls: 3.6459 (3.8952) grad_norm: 2.3791 (2.4343) time: 2.3601 data: 0.0003 max mem: 8426 +[2024-12-11 01:32:01 root] (utils.py 283): INFO Epoch: [25] [2060/2502] eta: 0:09:34 lr: 0.000003 loss_cls: 4.2201 (3.8970) grad_norm: 2.4131 (2.4344) time: 2.3539 data: 0.0003 max mem: 8426 +[2024-12-11 01:32:24 root] (utils.py 283): INFO Epoch: [25] [2070/2502] eta: 0:09:24 lr: 0.000003 loss_cls: 4.2885 (3.8979) grad_norm: 2.4184 (2.4347) time: 2.3348 data: 0.0003 max mem: 8426 +[2024-12-11 01:32:47 root] (utils.py 283): INFO Epoch: [25] [2080/2502] eta: 0:09:12 lr: 0.000003 loss_cls: 4.0914 (3.8978) grad_norm: 2.4032 (2.4343) time: 2.3115 data: 0.0003 max mem: 8426 +[2024-12-11 01:33:11 root] (utils.py 283): INFO Epoch: [25] [2090/2502] eta: 0:09:01 lr: 0.000003 loss_cls: 4.0250 (3.8974) grad_norm: 2.4559 (2.4347) time: 2.3300 data: 0.0003 max mem: 8426 +[2024-12-11 01:33:34 root] (utils.py 283): INFO Epoch: [25] [2100/2502] eta: 0:08:50 lr: 0.000003 loss_cls: 3.6920 (3.8966) grad_norm: 2.5032 (2.4351) time: 2.3463 data: 0.0003 max mem: 8426 +[2024-12-11 01:33:58 root] (utils.py 283): INFO Epoch: [25] [2110/2502] eta: 0:08:39 lr: 0.000003 loss_cls: 3.6946 (3.8966) grad_norm: 2.4104 (2.4348) time: 2.3600 data: 0.0003 max mem: 8426 +[2024-12-11 01:34:21 root] (utils.py 283): INFO Epoch: [25] [2120/2502] eta: 0:08:28 lr: 0.000003 loss_cls: 3.6640 (3.8945) grad_norm: 2.4037 (2.4348) time: 2.3694 data: 0.0003 max mem: 8426 +[2024-12-11 01:34:45 root] (utils.py 283): INFO Epoch: [25] [2130/2502] eta: 0:08:16 lr: 0.000003 loss_cls: 3.8829 (3.8947) grad_norm: 2.4274 (2.4348) time: 2.3553 data: 0.0003 max mem: 8426 +[2024-12-11 01:35:08 root] (utils.py 283): INFO Epoch: [25] [2140/2502] eta: 0:08:04 lr: 0.000003 loss_cls: 4.0849 (3.8949) grad_norm: 2.4080 (2.4344) time: 2.3475 data: 0.0002 max mem: 8426 +[2024-12-11 01:35:32 root] (utils.py 283): INFO Epoch: [25] [2150/2502] eta: 0:07:53 lr: 0.000003 loss_cls: 4.0860 (3.8950) grad_norm: 2.4663 (2.4348) time: 2.3330 data: 0.0003 max mem: 8426 +[2024-12-11 01:35:55 root] (utils.py 283): INFO Epoch: [25] [2160/2502] eta: 0:07:41 lr: 0.000003 loss_cls: 3.9267 (3.8948) grad_norm: 2.4862 (2.4345) time: 2.3282 data: 0.0003 max mem: 8426 +[2024-12-11 01:36:18 root] (utils.py 283): INFO Epoch: [25] [2170/2502] eta: 0:07:29 lr: 0.000003 loss_cls: 3.8018 (3.8937) grad_norm: 2.4000 (2.4346) time: 2.3336 data: 0.0003 max mem: 8426 +[2024-12-11 01:36:41 root] (utils.py 283): INFO Epoch: [25] [2180/2502] eta: 0:07:17 lr: 0.000003 loss_cls: 3.6851 (3.8933) grad_norm: 2.3589 (2.4342) time: 2.3343 data: 0.0003 max mem: 8426 +[2024-12-11 01:37:05 root] (utils.py 283): INFO Epoch: [25] [2190/2502] eta: 0:07:05 lr: 0.000003 loss_cls: 3.9328 (3.8941) grad_norm: 2.3411 (2.4341) time: 2.3405 data: 0.0002 max mem: 8426 +[2024-12-11 01:37:28 root] (utils.py 283): INFO Epoch: [25] [2200/2502] eta: 0:06:52 lr: 0.000003 loss_cls: 4.2195 (3.8954) grad_norm: 2.4572 (2.4341) time: 2.3467 data: 0.0003 max mem: 8426 +[2024-12-11 01:37:52 root] (utils.py 283): INFO Epoch: [25] [2210/2502] eta: 0:06:40 lr: 0.000003 loss_cls: 3.9401 (3.8942) grad_norm: 2.4814 (2.4341) time: 2.3544 data: 0.0003 max mem: 8426 +[2024-12-11 01:38:15 root] (utils.py 283): INFO Epoch: [25] [2220/2502] eta: 0:06:27 lr: 0.000003 loss_cls: 3.5332 (3.8934) grad_norm: 2.4542 (2.4342) time: 2.3421 data: 0.0003 max mem: 8426 +[2024-12-11 01:38:38 root] (utils.py 283): INFO Epoch: [25] [2230/2502] eta: 0:06:15 lr: 0.000003 loss_cls: 3.5657 (3.8931) grad_norm: 2.4542 (2.4342) time: 2.2762 data: 0.0003 max mem: 8426 +[2024-12-11 01:39:01 root] (utils.py 283): INFO Epoch: [25] [2240/2502] eta: 0:06:02 lr: 0.000003 loss_cls: 4.0099 (3.8934) grad_norm: 2.4821 (2.4343) time: 2.2898 data: 0.0003 max mem: 8426 +[2024-12-11 01:39:24 root] (utils.py 283): INFO Epoch: [25] [2250/2502] eta: 0:05:49 lr: 0.000003 loss_cls: 3.8444 (3.8919) grad_norm: 2.4464 (2.4343) time: 2.3023 data: 0.0003 max mem: 8426 +[2024-12-11 01:39:47 root] (utils.py 283): INFO Epoch: [25] [2260/2502] eta: 0:05:36 lr: 0.000003 loss_cls: 3.8184 (3.8918) grad_norm: 2.4464 (2.4344) time: 2.2871 data: 0.0003 max mem: 8426 +[2024-12-11 01:40:09 root] (utils.py 283): INFO Epoch: [25] [2270/2502] eta: 0:05:23 lr: 0.000003 loss_cls: 3.9229 (3.8910) grad_norm: 2.4253 (2.4345) time: 2.2869 data: 0.0003 max mem: 8426 +[2024-12-11 01:40:33 root] (utils.py 283): INFO Epoch: [25] [2280/2502] eta: 0:05:10 lr: 0.000003 loss_cls: 3.8792 (3.8902) grad_norm: 2.3693 (2.4341) time: 2.2887 data: 0.0003 max mem: 8426 +[2024-12-11 01:40:55 root] (utils.py 283): INFO Epoch: [25] [2290/2502] eta: 0:04:57 lr: 0.000003 loss_cls: 3.7068 (3.8898) grad_norm: 2.3978 (2.4340) time: 2.2901 data: 0.0003 max mem: 8426 +[2024-12-11 01:41:19 root] (utils.py 283): INFO Epoch: [25] [2300/2502] eta: 0:04:44 lr: 0.000003 loss_cls: 3.7210 (3.8891) grad_norm: 2.3978 (2.4338) time: 2.3139 data: 0.0003 max mem: 8426 +[2024-12-11 01:41:42 root] (utils.py 283): INFO Epoch: [25] [2310/2502] eta: 0:04:31 lr: 0.000003 loss_cls: 3.9244 (3.8903) grad_norm: 2.3859 (2.4340) time: 2.3508 data: 0.0003 max mem: 8426 +[2024-12-11 01:42:06 root] (utils.py 283): INFO Epoch: [25] [2320/2502] eta: 0:04:17 lr: 0.000003 loss_cls: 4.0546 (3.8905) grad_norm: 2.3849 (2.4338) time: 2.3844 data: 0.0003 max mem: 8426 +[2024-12-11 01:42:31 root] (utils.py 283): INFO Epoch: [25] [2330/2502] eta: 0:04:04 lr: 0.000003 loss_cls: 4.0546 (3.8905) grad_norm: 2.3643 (2.4334) time: 2.4287 data: 0.0003 max mem: 8426 +[2024-12-11 01:42:55 root] (utils.py 283): INFO Epoch: [25] [2340/2502] eta: 0:03:50 lr: 0.000003 loss_cls: 4.1473 (3.8912) grad_norm: 2.3928 (2.4336) time: 2.4247 data: 0.0003 max mem: 8426 +[2024-12-11 01:43:18 root] (utils.py 283): INFO Epoch: [25] [2350/2502] eta: 0:03:37 lr: 0.000003 loss_cls: 4.0351 (3.8916) grad_norm: 2.4846 (2.4337) time: 2.3772 data: 0.0003 max mem: 8426 +[2024-12-11 01:43:42 root] (utils.py 283): INFO Epoch: [25] [2360/2502] eta: 0:03:23 lr: 0.000003 loss_cls: 3.8705 (3.8911) grad_norm: 2.4538 (2.4339) time: 2.3583 data: 0.0002 max mem: 8426 +[2024-12-11 01:44:06 root] (utils.py 283): INFO Epoch: [25] [2370/2502] eta: 0:03:09 lr: 0.000003 loss_cls: 3.9566 (3.8913) grad_norm: 2.3992 (2.4338) time: 2.3649 data: 0.0002 max mem: 8426 +[2024-12-11 01:44:30 root] (utils.py 283): INFO Epoch: [25] [2380/2502] eta: 0:02:55 lr: 0.000003 loss_cls: 4.1272 (3.8919) grad_norm: 2.3992 (2.4338) time: 2.3705 data: 0.0003 max mem: 8426 +[2024-12-11 01:44:53 root] (utils.py 283): INFO Epoch: [25] [2390/2502] eta: 0:02:41 lr: 0.000003 loss_cls: 3.9619 (3.8912) grad_norm: 2.4481 (2.4338) time: 2.3501 data: 0.0003 max mem: 8426 +[2024-12-11 01:45:16 root] (utils.py 283): INFO Epoch: [25] [2400/2502] eta: 0:02:27 lr: 0.000003 loss_cls: 3.8609 (3.8914) grad_norm: 2.3673 (2.4335) time: 2.3275 data: 0.0003 max mem: 8426 +[2024-12-11 01:45:39 root] (utils.py 283): INFO Epoch: [25] [2410/2502] eta: 0:02:13 lr: 0.000003 loss_cls: 3.8609 (3.8909) grad_norm: 2.3495 (2.4332) time: 2.3340 data: 0.0003 max mem: 8426 +[2024-12-11 01:46:03 root] (utils.py 283): INFO Epoch: [25] [2420/2502] eta: 0:01:59 lr: 0.000003 loss_cls: 3.6579 (3.8902) grad_norm: 2.3861 (2.4331) time: 2.3316 data: 0.0003 max mem: 8426 +[2024-12-11 01:46:26 root] (utils.py 283): INFO Epoch: [25] [2430/2502] eta: 0:01:45 lr: 0.000003 loss_cls: 4.0505 (3.8907) grad_norm: 2.4341 (2.4336) time: 2.3278 data: 0.0003 max mem: 8426 +[2024-12-11 01:46:50 root] (utils.py 283): INFO Epoch: [25] [2440/2502] eta: 0:01:30 lr: 0.000003 loss_cls: 4.1076 (3.8907) grad_norm: 2.4429 (2.4335) time: 2.3493 data: 0.0003 max mem: 8426 +[2024-12-11 01:47:12 root] (utils.py 283): INFO Epoch: [25] [2450/2502] eta: 0:01:16 lr: 0.000003 loss_cls: 4.0291 (3.8910) grad_norm: 2.4295 (2.4336) time: 2.3202 data: 0.0003 max mem: 8426 +[2024-12-11 01:47:36 root] (utils.py 283): INFO Epoch: [25] [2460/2502] eta: 0:01:01 lr: 0.000003 loss_cls: 3.9924 (3.8908) grad_norm: 2.4506 (2.4341) time: 2.3166 data: 0.0003 max mem: 8426 +[2024-12-11 01:48:00 root] (utils.py 283): INFO Epoch: [25] [2470/2502] eta: 0:00:47 lr: 0.000003 loss_cls: 3.9828 (3.8907) grad_norm: 2.4261 (2.4340) time: 2.3738 data: 0.0003 max mem: 8426 +[2024-12-11 01:48:24 root] (utils.py 283): INFO Epoch: [25] [2480/2502] eta: 0:00:32 lr: 0.000003 loss_cls: 3.9828 (3.8914) grad_norm: 2.4611 (2.4344) time: 2.3838 data: 0.0003 max mem: 8426 +[2024-12-11 01:48:48 root] (utils.py 283): INFO Epoch: [25] [2490/2502] eta: 0:00:17 lr: 0.000003 loss_cls: 4.0684 (3.8903) grad_norm: 2.4961 (2.4346) time: 2.3990 data: 0.0239 max mem: 8426 +[2024-12-11 01:49:11 root] (utils.py 283): INFO Epoch: [25] [2500/2502] eta: 0:00:02 lr: 0.000003 loss_cls: 4.0132 (3.8907) grad_norm: 2.3922 (2.4343) time: 2.3737 data: 0.0239 max mem: 8426 +[2024-12-11 01:49:13 root] (utils.py 283): INFO Epoch: [25] [2501/2502] eta: 0:00:01 lr: 0.000003 loss_cls: 4.0684 (3.8908) grad_norm: 2.3919 (2.4342) time: 2.3614 data: 0.0239 max mem: 8426 +[2024-12-11 01:49:13 root] (utils.py 297): INFO Epoch: [25] Total time: 1:01:53 (1.4842 s / it) +[2024-12-11 01:49:13 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 4.0684 (3.8834) grad_norm: 2.3919 (2.4342) +[2024-12-11 01:49:14 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:29 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6265 (0.6265) acc1: 84.3750 (84.3750) acc3: 97.6562 (97.6562) acc5: 98.4375 (98.4375) time: 0.2970 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:16 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:18 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7211 (0.7981) acc1: 84.3750 (82.3153) acc3: 95.3125 (93.7500) acc5: 97.6562 (96.5909) time: 0.2095 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:18 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:15 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8383 (0.8516) acc1: 79.6875 (81.5476) acc3: 91.4062 (93.0804) acc5: 94.5312 (95.4613) time: 0.1952 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:20 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9366 (0.8670) acc1: 79.6875 (80.6956) acc3: 92.1875 (93.1956) acc5: 95.3125 (95.7157) time: 0.2090 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:22 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8095 (0.8569) acc1: 79.6875 (81.0213) acc3: 93.7500 (93.3117) acc5: 96.8750 (95.8460) time: 0.2037 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:24 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0243 (0.9420) acc1: 75.7812 (78.9522) acc3: 89.0625 (91.8045) acc5: 92.1875 (94.7763) time: 0.1845 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:26 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2249 (0.9882) acc1: 71.8750 (78.1762) acc3: 85.9375 (90.8940) acc5: 89.8438 (93.9421) time: 0.2052 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:28 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1762 (1.0278) acc1: 75.0000 (77.2777) acc3: 85.9375 (90.3829) acc5: 89.8438 (93.5299) time: 0.2058 data: 0.0004 max mem: 8426 +[2024-12-11 01:49:30 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1918 (1.0619) acc1: 73.4375 (76.4950) acc3: 87.5000 (89.8341) acc5: 89.8438 (93.0170) time: 0.2120 data: 0.0006 max mem: 8426 +[2024-12-11 01:49:32 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2243 (1.0905) acc1: 70.3125 (75.7383) acc3: 85.1562 (89.4660) acc5: 89.8438 (92.7541) time: 0.2069 data: 0.0006 max mem: 8426 +[2024-12-11 01:49:33 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1613 (1.0785) acc1: 73.4375 (75.9760) acc3: 89.0625 (89.6480) acc5: 91.4062 (92.9280) time: 0.1903 data: 0.0006 max mem: 8426 +[2024-12-11 01:49:33 root] (utils.py 297): INFO Test: Total time: 0:00:19 (0.2009 s / it) +[2024-12-11 01:49:33 root] (engine.py 264): INFO * Acc@1 75.800 Acc@3 89.680 Acc@5 92.938 loss 1.079 flops 1.285 layer_flops 1.251 +[2024-12-11 01:49:33 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.8% +[2024-12-11 01:49:33 root] (main.py 576): INFO Max accuracy: 75.88% +[2024-12-11 01:49:36 root] (utils.py 283): INFO Epoch: [26] [ 0/2502] eta: 1:28:54 lr: 0.000002 loss_cls: 3.5228 (3.5228) grad_norm: 2.5530 (2.5530) time: 2.1319 data: 0.0003 max mem: 8426 +[2024-12-11 01:49:58 root] (utils.py 283): INFO Epoch: [26] [ 10/2502] eta: 1:34:38 lr: 0.000002 loss_cls: 3.8609 (3.9660) grad_norm: 2.4708 (2.4685) time: 2.2788 data: 0.0003 max mem: 8426 +[2024-12-11 01:50:22 root] (utils.py 283): INFO Epoch: [26] [ 20/2502] eta: 1:35:32 lr: 0.000002 loss_cls: 3.8609 (3.9089) grad_norm: 2.3960 (2.4653) time: 2.3187 data: 0.0002 max mem: 8426 +[2024-12-11 01:50:45 root] (utils.py 283): INFO Epoch: [26] [ 30/2502] eta: 1:35:46 lr: 0.000002 loss_cls: 3.7603 (3.8700) grad_norm: 2.4354 (2.4713) time: 2.3496 data: 0.0003 max mem: 8426 +[2024-12-11 01:51:08 root] (utils.py 283): INFO Epoch: [26] [ 40/2502] eta: 1:35:08 lr: 0.000002 loss_cls: 3.9530 (3.9275) grad_norm: 2.4296 (2.4639) time: 2.3281 data: 0.0003 max mem: 8426 +[2024-12-11 01:51:31 root] (utils.py 283): INFO Epoch: [26] [ 50/2502] eta: 1:34:22 lr: 0.000002 loss_cls: 3.9231 (3.8861) grad_norm: 2.4935 (2.4807) time: 2.2863 data: 0.0003 max mem: 8426 +[2024-12-11 01:51:53 root] (utils.py 283): INFO Epoch: [26] [ 60/2502] eta: 1:33:27 lr: 0.000002 loss_cls: 4.2105 (3.9446) grad_norm: 2.5262 (2.4880) time: 2.2499 data: 0.0003 max mem: 8426 +[2024-12-11 01:52:16 root] (utils.py 283): INFO Epoch: [26] [ 70/2502] eta: 1:32:52 lr: 0.000002 loss_cls: 4.2395 (3.9483) grad_norm: 2.5077 (2.4798) time: 2.2449 data: 0.0003 max mem: 8426 +[2024-12-11 01:52:39 root] (utils.py 283): INFO Epoch: [26] [ 80/2502] eta: 1:32:39 lr: 0.000002 loss_cls: 3.5439 (3.9180) grad_norm: 2.4154 (2.4731) time: 2.2925 data: 0.0003 max mem: 8426 +[2024-12-11 01:53:03 root] (utils.py 283): INFO Epoch: [26] [ 90/2502] eta: 1:32:26 lr: 0.000002 loss_cls: 3.6180 (3.8985) grad_norm: 2.4093 (2.4668) time: 2.3298 data: 0.0002 max mem: 8426 +[2024-12-11 01:53:26 root] (utils.py 283): INFO Epoch: [26] [ 100/2502] eta: 1:32:23 lr: 0.000002 loss_cls: 3.6760 (3.8780) grad_norm: 2.4684 (2.4795) time: 2.3591 data: 0.0002 max mem: 8426 +[2024-12-11 01:53:50 root] (utils.py 283): INFO Epoch: [26] [ 110/2502] eta: 1:32:10 lr: 0.000002 loss_cls: 4.0243 (3.8951) grad_norm: 2.4632 (2.4762) time: 2.3693 data: 0.0002 max mem: 8426 +[2024-12-11 01:54:14 root] (utils.py 283): INFO Epoch: [26] [ 120/2502] eta: 1:32:02 lr: 0.000002 loss_cls: 4.1798 (3.9054) grad_norm: 2.3933 (2.4658) time: 2.3711 data: 0.0003 max mem: 8426 +[2024-12-11 01:54:36 root] (utils.py 283): INFO Epoch: [26] [ 130/2502] eta: 1:31:20 lr: 0.000002 loss_cls: 4.1906 (3.9302) grad_norm: 2.3765 (2.4630) time: 2.3015 data: 0.0003 max mem: 8426 +[2024-12-11 01:55:00 root] (utils.py 283): INFO Epoch: [26] [ 140/2502] eta: 1:31:10 lr: 0.000002 loss_cls: 4.2693 (3.9479) grad_norm: 2.3765 (2.4605) time: 2.3026 data: 0.0003 max mem: 8426 +[2024-12-11 01:55:23 root] (utils.py 283): INFO Epoch: [26] [ 150/2502] eta: 1:30:50 lr: 0.000002 loss_cls: 4.2383 (3.9620) grad_norm: 2.4749 (2.4634) time: 2.3618 data: 0.0003 max mem: 8426 +[2024-12-11 01:55:48 root] (utils.py 283): INFO Epoch: [26] [ 160/2502] eta: 1:30:51 lr: 0.000002 loss_cls: 4.1300 (3.9702) grad_norm: 2.4507 (2.4581) time: 2.4083 data: 0.0003 max mem: 8426 +[2024-12-11 01:56:12 root] (utils.py 283): INFO Epoch: [26] [ 170/2502] eta: 1:30:34 lr: 0.000002 loss_cls: 3.9764 (3.9526) grad_norm: 2.4417 (2.4612) time: 2.4279 data: 0.0003 max mem: 8426 +[2024-12-11 01:56:35 root] (utils.py 283): INFO Epoch: [26] [ 180/2502] eta: 1:30:12 lr: 0.000002 loss_cls: 3.9998 (3.9599) grad_norm: 2.4520 (2.4604) time: 2.3588 data: 0.0003 max mem: 8426 +[2024-12-11 01:56:58 root] (utils.py 283): INFO Epoch: [26] [ 190/2502] eta: 1:29:42 lr: 0.000002 loss_cls: 4.0442 (3.9551) grad_norm: 2.4826 (2.4609) time: 2.3073 data: 0.0003 max mem: 8426 +[2024-12-11 01:57:21 root] (utils.py 283): INFO Epoch: [26] [ 200/2502] eta: 1:29:13 lr: 0.000002 loss_cls: 3.9592 (3.9547) grad_norm: 2.4482 (2.4610) time: 2.2785 data: 0.0003 max mem: 8426 +[2024-12-11 01:57:45 root] (utils.py 283): INFO Epoch: [26] [ 210/2502] eta: 1:28:56 lr: 0.000002 loss_cls: 3.9902 (3.9534) grad_norm: 2.4077 (2.4596) time: 2.3329 data: 0.0003 max mem: 8426 +[2024-12-11 01:58:08 root] (utils.py 283): INFO Epoch: [26] [ 220/2502] eta: 1:28:33 lr: 0.000002 loss_cls: 4.0212 (3.9606) grad_norm: 2.4426 (2.4613) time: 2.3558 data: 0.0003 max mem: 8426 +[2024-12-11 01:58:32 root] (utils.py 283): INFO Epoch: [26] [ 230/2502] eta: 1:28:15 lr: 0.000002 loss_cls: 4.1953 (3.9674) grad_norm: 2.4388 (2.4618) time: 2.3560 data: 0.0003 max mem: 8426 +[2024-12-11 01:58:55 root] (utils.py 283): INFO Epoch: [26] [ 240/2502] eta: 1:27:54 lr: 0.000002 loss_cls: 3.9656 (3.9620) grad_norm: 2.4369 (2.4602) time: 2.3676 data: 0.0002 max mem: 8426 +[2024-12-11 01:59:19 root] (utils.py 283): INFO Epoch: [26] [ 250/2502] eta: 1:27:33 lr: 0.000002 loss_cls: 3.8380 (3.9432) grad_norm: 2.3884 (2.4581) time: 2.3539 data: 0.0002 max mem: 8426 +[2024-12-11 01:59:42 root] (utils.py 283): INFO Epoch: [26] [ 260/2502] eta: 1:27:08 lr: 0.000002 loss_cls: 3.6394 (3.9414) grad_norm: 2.3956 (2.4579) time: 2.3372 data: 0.0003 max mem: 8426 +[2024-12-11 02:00:05 root] (utils.py 283): INFO Epoch: [26] [ 270/2502] eta: 1:26:44 lr: 0.000002 loss_cls: 3.9277 (3.9406) grad_norm: 2.4772 (2.4578) time: 2.3204 data: 0.0003 max mem: 8426 +[2024-12-11 02:00:29 root] (utils.py 283): INFO Epoch: [26] [ 280/2502] eta: 1:26:20 lr: 0.000002 loss_cls: 3.9548 (3.9397) grad_norm: 2.3957 (2.4558) time: 2.3226 data: 0.0003 max mem: 8426 +[2024-12-11 02:00:51 root] (utils.py 283): INFO Epoch: [26] [ 290/2502] eta: 1:25:53 lr: 0.000002 loss_cls: 4.2142 (3.9247) grad_norm: 2.3955 (2.4547) time: 2.3049 data: 0.0003 max mem: 8426 +[2024-12-11 02:01:15 root] (utils.py 283): INFO Epoch: [26] [ 300/2502] eta: 1:25:32 lr: 0.000002 loss_cls: 4.1048 (3.9225) grad_norm: 2.3836 (2.4526) time: 2.3241 data: 0.0003 max mem: 8426 +[2024-12-11 02:01:38 root] (utils.py 283): INFO Epoch: [26] [ 310/2502] eta: 1:25:09 lr: 0.000002 loss_cls: 4.0793 (3.9189) grad_norm: 2.3972 (2.4525) time: 2.3443 data: 0.0002 max mem: 8426 +[2024-12-11 02:02:02 root] (utils.py 283): INFO Epoch: [26] [ 320/2502] eta: 1:24:45 lr: 0.000002 loss_cls: 3.8459 (3.9110) grad_norm: 2.4504 (2.4538) time: 2.3246 data: 0.0002 max mem: 8426 +[2024-12-11 02:02:24 root] (utils.py 283): INFO Epoch: [26] [ 330/2502] eta: 1:24:19 lr: 0.000002 loss_cls: 4.0620 (3.9113) grad_norm: 2.4832 (2.4536) time: 2.3085 data: 0.0002 max mem: 8426 +[2024-12-11 02:02:47 root] (utils.py 283): INFO Epoch: [26] [ 340/2502] eta: 1:23:53 lr: 0.000002 loss_cls: 4.0308 (3.9123) grad_norm: 2.4847 (2.4562) time: 2.2916 data: 0.0002 max mem: 8426 +[2024-12-11 02:03:10 root] (utils.py 283): INFO Epoch: [26] [ 350/2502] eta: 1:23:26 lr: 0.000002 loss_cls: 4.0308 (3.9176) grad_norm: 2.4825 (2.4562) time: 2.2787 data: 0.0003 max mem: 8426 +[2024-12-11 02:03:33 root] (utils.py 283): INFO Epoch: [26] [ 360/2502] eta: 1:23:04 lr: 0.000002 loss_cls: 4.0425 (3.9169) grad_norm: 2.3870 (2.4538) time: 2.3061 data: 0.0002 max mem: 8426 +[2024-12-11 02:03:57 root] (utils.py 283): INFO Epoch: [26] [ 370/2502] eta: 1:22:42 lr: 0.000002 loss_cls: 4.1079 (3.9226) grad_norm: 2.3870 (2.4544) time: 2.3503 data: 0.0002 max mem: 8426 +[2024-12-11 02:04:20 root] (utils.py 283): INFO Epoch: [26] [ 380/2502] eta: 1:22:18 lr: 0.000002 loss_cls: 4.2063 (3.9283) grad_norm: 2.4615 (2.4546) time: 2.3287 data: 0.0003 max mem: 8426 +[2024-12-11 02:04:44 root] (utils.py 283): INFO Epoch: [26] [ 390/2502] eta: 1:21:56 lr: 0.000002 loss_cls: 4.2195 (3.9323) grad_norm: 2.4183 (2.4528) time: 2.3291 data: 0.0002 max mem: 8426 +[2024-12-11 02:05:07 root] (utils.py 283): INFO Epoch: [26] [ 400/2502] eta: 1:21:32 lr: 0.000002 loss_cls: 4.0921 (3.9363) grad_norm: 2.4183 (2.4524) time: 2.3392 data: 0.0003 max mem: 8426 +[2024-12-11 02:05:30 root] (utils.py 283): INFO Epoch: [26] [ 410/2502] eta: 1:21:10 lr: 0.000002 loss_cls: 4.1458 (3.9390) grad_norm: 2.4387 (2.4520) time: 2.3343 data: 0.0003 max mem: 8426 +[2024-12-11 02:05:53 root] (utils.py 283): INFO Epoch: [26] [ 420/2502] eta: 1:20:44 lr: 0.000002 loss_cls: 4.0346 (3.9359) grad_norm: 2.3658 (2.4503) time: 2.3093 data: 0.0003 max mem: 8426 +[2024-12-11 02:06:17 root] (utils.py 283): INFO Epoch: [26] [ 430/2502] eta: 1:20:24 lr: 0.000002 loss_cls: 4.0346 (3.9367) grad_norm: 2.3553 (2.4491) time: 2.3278 data: 0.0003 max mem: 8426 +[2024-12-11 02:06:40 root] (utils.py 283): INFO Epoch: [26] [ 440/2502] eta: 1:19:59 lr: 0.000002 loss_cls: 4.0760 (3.9405) grad_norm: 2.3747 (2.4493) time: 2.3474 data: 0.0003 max mem: 8426 +[2024-12-11 02:07:04 root] (utils.py 283): INFO Epoch: [26] [ 450/2502] eta: 1:19:38 lr: 0.000002 loss_cls: 4.2110 (3.9440) grad_norm: 2.4449 (2.4509) time: 2.3440 data: 0.0003 max mem: 8426 +[2024-12-11 02:07:27 root] (utils.py 283): INFO Epoch: [26] [ 460/2502] eta: 1:19:13 lr: 0.000002 loss_cls: 4.0813 (3.9440) grad_norm: 2.4240 (2.4495) time: 2.3325 data: 0.0003 max mem: 8426 +[2024-12-11 02:07:50 root] (utils.py 283): INFO Epoch: [26] [ 470/2502] eta: 1:18:52 lr: 0.000002 loss_cls: 4.0713 (3.9446) grad_norm: 2.4010 (2.4494) time: 2.3277 data: 0.0003 max mem: 8426 +[2024-12-11 02:08:14 root] (utils.py 283): INFO Epoch: [26] [ 480/2502] eta: 1:18:29 lr: 0.000002 loss_cls: 4.1316 (3.9491) grad_norm: 2.4334 (2.4502) time: 2.3605 data: 0.0003 max mem: 8426 +[2024-12-11 02:08:38 root] (utils.py 283): INFO Epoch: [26] [ 490/2502] eta: 1:18:11 lr: 0.000002 loss_cls: 4.1673 (3.9530) grad_norm: 2.4564 (2.4502) time: 2.4010 data: 0.0003 max mem: 8426 +[2024-12-11 02:09:01 root] (utils.py 283): INFO Epoch: [26] [ 500/2502] eta: 1:17:46 lr: 0.000002 loss_cls: 4.1403 (3.9539) grad_norm: 2.4278 (2.4504) time: 2.3715 data: 0.0003 max mem: 8426 +[2024-12-11 02:09:24 root] (utils.py 283): INFO Epoch: [26] [ 510/2502] eta: 1:17:20 lr: 0.000002 loss_cls: 3.8739 (3.9510) grad_norm: 2.4235 (2.4508) time: 2.2716 data: 0.0003 max mem: 8426 +[2024-12-11 02:09:47 root] (utils.py 283): INFO Epoch: [26] [ 520/2502] eta: 1:16:56 lr: 0.000002 loss_cls: 4.1347 (3.9557) grad_norm: 2.4299 (2.4519) time: 2.2836 data: 0.0003 max mem: 8426 +[2024-12-11 02:10:10 root] (utils.py 283): INFO Epoch: [26] [ 530/2502] eta: 1:16:33 lr: 0.000002 loss_cls: 4.1106 (3.9518) grad_norm: 2.4691 (2.4515) time: 2.3253 data: 0.0003 max mem: 8426 +[2024-12-11 02:10:33 root] (utils.py 283): INFO Epoch: [26] [ 540/2502] eta: 1:16:09 lr: 0.000002 loss_cls: 3.6171 (3.9461) grad_norm: 2.4865 (2.4527) time: 2.3195 data: 0.0003 max mem: 8426 +[2024-12-11 02:10:56 root] (utils.py 283): INFO Epoch: [26] [ 550/2502] eta: 1:15:43 lr: 0.000002 loss_cls: 3.8115 (3.9429) grad_norm: 2.4251 (2.4514) time: 2.2868 data: 0.0003 max mem: 8426 +[2024-12-11 02:11:19 root] (utils.py 283): INFO Epoch: [26] [ 560/2502] eta: 1:15:20 lr: 0.000002 loss_cls: 4.1095 (3.9462) grad_norm: 2.3534 (2.4494) time: 2.2956 data: 0.0003 max mem: 8426 +[2024-12-11 02:11:42 root] (utils.py 283): INFO Epoch: [26] [ 570/2502] eta: 1:14:56 lr: 0.000002 loss_cls: 4.1095 (3.9418) grad_norm: 2.3534 (2.4489) time: 2.3126 data: 0.0003 max mem: 8426 +[2024-12-11 02:12:06 root] (utils.py 283): INFO Epoch: [26] [ 580/2502] eta: 1:14:32 lr: 0.000002 loss_cls: 3.7141 (3.9406) grad_norm: 2.4299 (2.4489) time: 2.3128 data: 0.0003 max mem: 8426 +[2024-12-11 02:12:29 root] (utils.py 283): INFO Epoch: [26] [ 590/2502] eta: 1:14:09 lr: 0.000002 loss_cls: 3.8514 (3.9376) grad_norm: 2.4456 (2.4492) time: 2.3323 data: 0.0003 max mem: 8426 +[2024-12-11 02:12:53 root] (utils.py 283): INFO Epoch: [26] [ 600/2502] eta: 1:13:48 lr: 0.000002 loss_cls: 3.7970 (3.9326) grad_norm: 2.4186 (2.4490) time: 2.3661 data: 0.0003 max mem: 8426 +[2024-12-11 02:13:16 root] (utils.py 283): INFO Epoch: [26] [ 610/2502] eta: 1:13:23 lr: 0.000002 loss_cls: 3.7862 (3.9302) grad_norm: 2.3906 (2.4491) time: 2.3326 data: 0.0003 max mem: 8426 +[2024-12-11 02:13:38 root] (utils.py 283): INFO Epoch: [26] [ 620/2502] eta: 1:12:57 lr: 0.000002 loss_cls: 4.0782 (3.9328) grad_norm: 2.3504 (2.4480) time: 2.2458 data: 0.0003 max mem: 8426 +[2024-12-11 02:14:01 root] (utils.py 283): INFO Epoch: [26] [ 630/2502] eta: 1:12:33 lr: 0.000002 loss_cls: 4.0568 (3.9328) grad_norm: 2.3683 (2.4482) time: 2.2670 data: 0.0003 max mem: 8426 +[2024-12-11 02:14:24 root] (utils.py 283): INFO Epoch: [26] [ 640/2502] eta: 1:12:10 lr: 0.000002 loss_cls: 3.7824 (3.9293) grad_norm: 2.4120 (2.4479) time: 2.3296 data: 0.0003 max mem: 8426 +[2024-12-11 02:14:47 root] (utils.py 283): INFO Epoch: [26] [ 650/2502] eta: 1:11:46 lr: 0.000002 loss_cls: 3.8707 (3.9291) grad_norm: 2.4120 (2.4481) time: 2.3209 data: 0.0003 max mem: 8426 +[2024-12-11 02:15:11 root] (utils.py 283): INFO Epoch: [26] [ 660/2502] eta: 1:11:24 lr: 0.000002 loss_cls: 3.9806 (3.9257) grad_norm: 2.3718 (2.4478) time: 2.3280 data: 0.0003 max mem: 8426 +[2024-12-11 02:15:34 root] (utils.py 283): INFO Epoch: [26] [ 670/2502] eta: 1:11:01 lr: 0.000002 loss_cls: 3.9700 (3.9259) grad_norm: 2.3322 (2.4457) time: 2.3415 data: 0.0003 max mem: 8426 +[2024-12-11 02:15:57 root] (utils.py 283): INFO Epoch: [26] [ 680/2502] eta: 1:10:36 lr: 0.000002 loss_cls: 3.8298 (3.9214) grad_norm: 2.3410 (2.4464) time: 2.2956 data: 0.0003 max mem: 8426 +[2024-12-11 02:16:20 root] (utils.py 283): INFO Epoch: [26] [ 690/2502] eta: 1:10:12 lr: 0.000002 loss_cls: 3.7321 (3.9195) grad_norm: 2.4849 (2.4465) time: 2.2840 data: 0.0003 max mem: 8426 +[2024-12-11 02:16:43 root] (utils.py 283): INFO Epoch: [26] [ 700/2502] eta: 1:09:49 lr: 0.000002 loss_cls: 3.9194 (3.9196) grad_norm: 2.4779 (2.4469) time: 2.3244 data: 0.0003 max mem: 8426 +[2024-12-11 02:17:07 root] (utils.py 283): INFO Epoch: [26] [ 710/2502] eta: 1:09:27 lr: 0.000002 loss_cls: 4.1638 (3.9207) grad_norm: 2.4810 (2.4471) time: 2.3522 data: 0.0003 max mem: 8426 +[2024-12-11 02:17:30 root] (utils.py 283): INFO Epoch: [26] [ 720/2502] eta: 1:09:04 lr: 0.000002 loss_cls: 4.2197 (3.9230) grad_norm: 2.4772 (2.4469) time: 2.3531 data: 0.0003 max mem: 8426 +[2024-12-11 02:17:54 root] (utils.py 283): INFO Epoch: [26] [ 730/2502] eta: 1:08:41 lr: 0.000002 loss_cls: 4.0368 (3.9174) grad_norm: 2.4772 (2.4471) time: 2.3463 data: 0.0003 max mem: 8426 +[2024-12-11 02:18:17 root] (utils.py 283): INFO Epoch: [26] [ 740/2502] eta: 1:08:19 lr: 0.000002 loss_cls: 4.0368 (3.9209) grad_norm: 2.4814 (2.4476) time: 2.3543 data: 0.0003 max mem: 8426 +[2024-12-11 02:18:41 root] (utils.py 283): INFO Epoch: [26] [ 750/2502] eta: 1:07:56 lr: 0.000002 loss_cls: 4.1671 (3.9211) grad_norm: 2.4104 (2.4467) time: 2.3522 data: 0.0002 max mem: 8426 +[2024-12-11 02:19:04 root] (utils.py 283): INFO Epoch: [26] [ 760/2502] eta: 1:07:32 lr: 0.000002 loss_cls: 4.0530 (3.9194) grad_norm: 2.3682 (2.4463) time: 2.3122 data: 0.0003 max mem: 8426 +[2024-12-11 02:19:27 root] (utils.py 283): INFO Epoch: [26] [ 770/2502] eta: 1:07:09 lr: 0.000002 loss_cls: 3.6604 (3.9146) grad_norm: 2.4783 (2.4475) time: 2.3213 data: 0.0003 max mem: 8426 +[2024-12-11 02:19:52 root] (utils.py 283): INFO Epoch: [26] [ 780/2502] eta: 1:06:49 lr: 0.000002 loss_cls: 3.5235 (3.9103) grad_norm: 2.4511 (2.4467) time: 2.4055 data: 0.0003 max mem: 8426 +[2024-12-11 02:20:15 root] (utils.py 283): INFO Epoch: [26] [ 790/2502] eta: 1:06:25 lr: 0.000002 loss_cls: 3.7001 (3.9084) grad_norm: 2.4651 (2.4475) time: 2.3817 data: 0.0003 max mem: 8426 +[2024-12-11 02:20:38 root] (utils.py 283): INFO Epoch: [26] [ 800/2502] eta: 1:06:02 lr: 0.000002 loss_cls: 4.0501 (3.9101) grad_norm: 2.4609 (2.4465) time: 2.3142 data: 0.0003 max mem: 8426 +[2024-12-11 02:21:02 root] (utils.py 283): INFO Epoch: [26] [ 810/2502] eta: 1:05:40 lr: 0.000002 loss_cls: 4.1357 (3.9106) grad_norm: 2.4114 (2.4463) time: 2.3526 data: 0.0003 max mem: 8426 +[2024-12-11 02:21:26 root] (utils.py 283): INFO Epoch: [26] [ 820/2502] eta: 1:05:17 lr: 0.000002 loss_cls: 3.9032 (3.9103) grad_norm: 2.4391 (2.4466) time: 2.3719 data: 0.0003 max mem: 8426 +[2024-12-11 02:21:49 root] (utils.py 283): INFO Epoch: [26] [ 830/2502] eta: 1:04:54 lr: 0.000002 loss_cls: 3.9506 (3.9118) grad_norm: 2.4612 (2.4471) time: 2.3457 data: 0.0003 max mem: 8426 +[2024-12-11 02:22:13 root] (utils.py 283): INFO Epoch: [26] [ 840/2502] eta: 1:04:32 lr: 0.000002 loss_cls: 3.9506 (3.9100) grad_norm: 2.5042 (2.4478) time: 2.3816 data: 0.0003 max mem: 8426 +[2024-12-11 02:22:36 root] (utils.py 283): INFO Epoch: [26] [ 850/2502] eta: 1:04:08 lr: 0.000002 loss_cls: 3.8103 (3.9099) grad_norm: 2.4557 (2.4484) time: 2.3500 data: 0.0003 max mem: 8426 +[2024-12-11 02:22:59 root] (utils.py 283): INFO Epoch: [26] [ 860/2502] eta: 1:03:44 lr: 0.000002 loss_cls: 4.0199 (3.9129) grad_norm: 2.3984 (2.4475) time: 2.2755 data: 0.0003 max mem: 8426 +[2024-12-11 02:23:23 root] (utils.py 283): INFO Epoch: [26] [ 870/2502] eta: 1:03:23 lr: 0.000002 loss_cls: 4.0351 (3.9132) grad_norm: 2.3680 (2.4472) time: 2.3759 data: 0.0003 max mem: 8426 +[2024-12-11 02:23:47 root] (utils.py 283): INFO Epoch: [26] [ 880/2502] eta: 1:03:01 lr: 0.000002 loss_cls: 3.9005 (3.9121) grad_norm: 2.3781 (2.4470) time: 2.4260 data: 0.0003 max mem: 8426 +[2024-12-11 02:24:11 root] (utils.py 283): INFO Epoch: [26] [ 890/2502] eta: 1:02:38 lr: 0.000002 loss_cls: 4.0929 (3.9148) grad_norm: 2.3856 (2.4474) time: 2.3860 data: 0.0003 max mem: 8426 +[2024-12-11 02:24:35 root] (utils.py 283): INFO Epoch: [26] [ 900/2502] eta: 1:02:16 lr: 0.000002 loss_cls: 4.0929 (3.9134) grad_norm: 2.4230 (2.4477) time: 2.3898 data: 0.0003 max mem: 8426 +[2024-12-11 02:24:59 root] (utils.py 283): INFO Epoch: [26] [ 910/2502] eta: 1:01:54 lr: 0.000002 loss_cls: 4.0014 (3.9119) grad_norm: 2.4276 (2.4477) time: 2.4068 data: 0.0003 max mem: 8426 +[2024-12-11 02:25:23 root] (utils.py 283): INFO Epoch: [26] [ 920/2502] eta: 1:01:31 lr: 0.000002 loss_cls: 3.9137 (3.9101) grad_norm: 2.4276 (2.4476) time: 2.3739 data: 0.0003 max mem: 8426 +[2024-12-11 02:25:46 root] (utils.py 283): INFO Epoch: [26] [ 930/2502] eta: 1:01:08 lr: 0.000002 loss_cls: 3.6424 (3.9061) grad_norm: 2.4009 (2.4469) time: 2.3414 data: 0.0003 max mem: 8426 +[2024-12-11 02:26:09 root] (utils.py 283): INFO Epoch: [26] [ 940/2502] eta: 1:00:44 lr: 0.000002 loss_cls: 3.9544 (3.9089) grad_norm: 2.3846 (2.4467) time: 2.3329 data: 0.0003 max mem: 8426 +[2024-12-11 02:26:32 root] (utils.py 283): INFO Epoch: [26] [ 950/2502] eta: 1:00:21 lr: 0.000002 loss_cls: 4.2849 (3.9100) grad_norm: 2.4130 (2.4468) time: 2.3147 data: 0.0003 max mem: 8426 +[2024-12-11 02:26:55 root] (utils.py 283): INFO Epoch: [26] [ 960/2502] eta: 0:59:56 lr: 0.000002 loss_cls: 4.2127 (3.9112) grad_norm: 2.4863 (2.4472) time: 2.2919 data: 0.0003 max mem: 8426 +[2024-12-11 02:27:19 root] (utils.py 283): INFO Epoch: [26] [ 970/2502] eta: 0:59:34 lr: 0.000002 loss_cls: 3.9511 (3.9091) grad_norm: 2.4744 (2.4475) time: 2.3199 data: 0.0003 max mem: 8426 +[2024-12-11 02:27:42 root] (utils.py 283): INFO Epoch: [26] [ 980/2502] eta: 0:59:11 lr: 0.000002 loss_cls: 3.9243 (3.9094) grad_norm: 2.4459 (2.4476) time: 2.3677 data: 0.0002 max mem: 8426 +[2024-12-11 02:28:05 root] (utils.py 283): INFO Epoch: [26] [ 990/2502] eta: 0:58:47 lr: 0.000002 loss_cls: 4.1219 (3.9100) grad_norm: 2.3961 (2.4473) time: 2.3250 data: 0.0003 max mem: 8426 +[2024-12-11 02:28:28 root] (utils.py 283): INFO Epoch: [26] [1000/2502] eta: 0:58:22 lr: 0.000002 loss_cls: 4.1385 (3.9120) grad_norm: 2.4030 (2.4470) time: 2.2686 data: 0.0003 max mem: 8426 +[2024-12-11 02:28:51 root] (utils.py 283): INFO Epoch: [26] [1010/2502] eta: 0:57:59 lr: 0.000002 loss_cls: 4.1530 (3.9112) grad_norm: 2.3593 (2.4468) time: 2.2988 data: 0.0003 max mem: 8426 +[2024-12-11 02:29:15 root] (utils.py 283): INFO Epoch: [26] [1020/2502] eta: 0:57:37 lr: 0.000002 loss_cls: 3.9132 (3.9110) grad_norm: 2.4806 (2.4474) time: 2.3814 data: 0.0003 max mem: 8426 +[2024-12-11 02:29:39 root] (utils.py 283): INFO Epoch: [26] [1030/2502] eta: 0:57:14 lr: 0.000002 loss_cls: 4.1411 (3.9141) grad_norm: 2.4212 (2.4467) time: 2.3878 data: 0.0003 max mem: 8426 +[2024-12-11 02:30:03 root] (utils.py 283): INFO Epoch: [26] [1040/2502] eta: 0:56:51 lr: 0.000002 loss_cls: 4.1246 (3.9137) grad_norm: 2.3640 (2.4463) time: 2.3775 data: 0.0003 max mem: 8426 +[2024-12-11 02:30:27 root] (utils.py 283): INFO Epoch: [26] [1050/2502] eta: 0:56:29 lr: 0.000002 loss_cls: 3.9738 (3.9139) grad_norm: 2.3859 (2.4457) time: 2.3824 data: 0.0003 max mem: 8426 +[2024-12-11 02:30:51 root] (utils.py 283): INFO Epoch: [26] [1060/2502] eta: 0:56:07 lr: 0.000002 loss_cls: 4.0565 (3.9173) grad_norm: 2.3859 (2.4453) time: 2.3980 data: 0.0003 max mem: 8426 +[2024-12-11 02:31:14 root] (utils.py 283): INFO Epoch: [26] [1070/2502] eta: 0:55:43 lr: 0.000002 loss_cls: 3.9676 (3.9149) grad_norm: 2.5156 (2.4461) time: 2.3558 data: 0.0003 max mem: 8426 +[2024-12-11 02:31:38 root] (utils.py 283): INFO Epoch: [26] [1080/2502] eta: 0:55:20 lr: 0.000002 loss_cls: 3.9345 (3.9158) grad_norm: 2.4829 (2.4454) time: 2.3542 data: 0.0003 max mem: 8426 +[2024-12-11 02:32:01 root] (utils.py 283): INFO Epoch: [26] [1090/2502] eta: 0:54:57 lr: 0.000002 loss_cls: 4.0268 (3.9156) grad_norm: 2.3665 (2.4451) time: 2.3708 data: 0.0003 max mem: 8426 +[2024-12-11 02:32:25 root] (utils.py 283): INFO Epoch: [26] [1100/2502] eta: 0:54:34 lr: 0.000002 loss_cls: 3.8772 (3.9152) grad_norm: 2.3968 (2.4447) time: 2.3661 data: 0.0003 max mem: 8426 +[2024-12-11 02:32:49 root] (utils.py 283): INFO Epoch: [26] [1110/2502] eta: 0:54:12 lr: 0.000002 loss_cls: 3.9462 (3.9172) grad_norm: 2.4209 (2.4450) time: 2.3935 data: 0.0003 max mem: 8426 +[2024-12-11 02:33:13 root] (utils.py 283): INFO Epoch: [26] [1120/2502] eta: 0:53:48 lr: 0.000002 loss_cls: 4.0501 (3.9166) grad_norm: 2.4674 (2.4454) time: 2.3652 data: 0.0002 max mem: 8426 +[2024-12-11 02:33:36 root] (utils.py 283): INFO Epoch: [26] [1130/2502] eta: 0:53:25 lr: 0.000002 loss_cls: 4.0501 (3.9160) grad_norm: 2.3789 (2.4446) time: 2.3584 data: 0.0003 max mem: 8426 +[2024-12-11 02:34:00 root] (utils.py 283): INFO Epoch: [26] [1140/2502] eta: 0:53:02 lr: 0.000002 loss_cls: 3.9713 (3.9152) grad_norm: 2.3789 (2.4451) time: 2.3590 data: 0.0003 max mem: 8426 +[2024-12-11 02:34:24 root] (utils.py 283): INFO Epoch: [26] [1150/2502] eta: 0:52:40 lr: 0.000002 loss_cls: 3.9980 (3.9173) grad_norm: 2.4498 (2.4444) time: 2.3888 data: 0.0002 max mem: 8426 +[2024-12-11 02:34:48 root] (utils.py 283): INFO Epoch: [26] [1160/2502] eta: 0:52:17 lr: 0.000002 loss_cls: 4.0860 (3.9168) grad_norm: 2.3915 (2.4443) time: 2.4232 data: 0.0003 max mem: 8426 +[2024-12-11 02:35:12 root] (utils.py 283): INFO Epoch: [26] [1170/2502] eta: 0:51:55 lr: 0.000002 loss_cls: 3.4624 (3.9131) grad_norm: 2.3780 (2.4438) time: 2.4206 data: 0.0003 max mem: 8426 +[2024-12-11 02:35:36 root] (utils.py 283): INFO Epoch: [26] [1180/2502] eta: 0:51:32 lr: 0.000002 loss_cls: 4.0422 (3.9164) grad_norm: 2.3629 (2.4437) time: 2.4038 data: 0.0003 max mem: 8426 +[2024-12-11 02:36:00 root] (utils.py 283): INFO Epoch: [26] [1190/2502] eta: 0:51:09 lr: 0.000002 loss_cls: 4.1721 (3.9172) grad_norm: 2.5005 (2.4444) time: 2.3897 data: 0.0003 max mem: 8426 +[2024-12-11 02:36:24 root] (utils.py 283): INFO Epoch: [26] [1200/2502] eta: 0:50:46 lr: 0.000002 loss_cls: 3.9836 (3.9156) grad_norm: 2.4591 (2.4441) time: 2.3681 data: 0.0003 max mem: 8426 +[2024-12-11 02:36:47 root] (utils.py 283): INFO Epoch: [26] [1210/2502] eta: 0:50:23 lr: 0.000002 loss_cls: 3.9809 (3.9152) grad_norm: 2.4093 (2.4442) time: 2.3476 data: 0.0003 max mem: 8426 +[2024-12-11 02:37:12 root] (utils.py 283): INFO Epoch: [26] [1220/2502] eta: 0:50:00 lr: 0.000002 loss_cls: 4.0472 (3.9142) grad_norm: 2.4093 (2.4438) time: 2.3975 data: 0.0003 max mem: 8426 +[2024-12-11 02:37:36 root] (utils.py 283): INFO Epoch: [26] [1230/2502] eta: 0:49:38 lr: 0.000002 loss_cls: 3.6678 (3.9139) grad_norm: 2.4376 (2.4441) time: 2.4296 data: 0.0003 max mem: 8426 +[2024-12-11 02:37:59 root] (utils.py 283): INFO Epoch: [26] [1240/2502] eta: 0:49:14 lr: 0.000002 loss_cls: 3.6678 (3.9138) grad_norm: 2.4430 (2.4440) time: 2.3828 data: 0.0002 max mem: 8426 +[2024-12-11 02:38:23 root] (utils.py 283): INFO Epoch: [26] [1250/2502] eta: 0:48:51 lr: 0.000002 loss_cls: 4.0207 (3.9138) grad_norm: 2.4105 (2.4440) time: 2.3502 data: 0.0002 max mem: 8426 +[2024-12-11 02:38:47 root] (utils.py 283): INFO Epoch: [26] [1260/2502] eta: 0:48:28 lr: 0.000002 loss_cls: 3.9529 (3.9121) grad_norm: 2.4190 (2.4437) time: 2.3640 data: 0.0002 max mem: 8426 +[2024-12-11 02:39:10 root] (utils.py 283): INFO Epoch: [26] [1270/2502] eta: 0:48:05 lr: 0.000002 loss_cls: 3.6971 (3.9097) grad_norm: 2.3888 (2.4434) time: 2.3724 data: 0.0002 max mem: 8426 +[2024-12-11 02:39:34 root] (utils.py 283): INFO Epoch: [26] [1280/2502] eta: 0:47:42 lr: 0.000002 loss_cls: 3.6581 (3.9085) grad_norm: 2.3538 (2.4430) time: 2.3720 data: 0.0003 max mem: 8426 +[2024-12-11 02:39:57 root] (utils.py 283): INFO Epoch: [26] [1290/2502] eta: 0:47:18 lr: 0.000002 loss_cls: 3.7907 (3.9078) grad_norm: 2.3790 (2.4431) time: 2.3364 data: 0.0003 max mem: 8426 +[2024-12-11 02:40:20 root] (utils.py 283): INFO Epoch: [26] [1300/2502] eta: 0:46:54 lr: 0.000002 loss_cls: 3.9313 (3.9089) grad_norm: 2.4368 (2.4427) time: 2.3018 data: 0.0003 max mem: 8426 +[2024-12-11 02:40:43 root] (utils.py 283): INFO Epoch: [26] [1310/2502] eta: 0:46:30 lr: 0.000002 loss_cls: 4.0949 (3.9099) grad_norm: 2.4537 (2.4430) time: 2.3072 data: 0.0003 max mem: 8426 +[2024-12-11 02:41:07 root] (utils.py 283): INFO Epoch: [26] [1320/2502] eta: 0:46:07 lr: 0.000002 loss_cls: 4.1984 (3.9120) grad_norm: 2.4759 (2.4436) time: 2.3350 data: 0.0003 max mem: 8426 +[2024-12-11 02:41:30 root] (utils.py 283): INFO Epoch: [26] [1330/2502] eta: 0:45:44 lr: 0.000002 loss_cls: 4.1190 (3.9103) grad_norm: 2.5496 (2.4447) time: 2.3662 data: 0.0003 max mem: 8426 +[2024-12-11 02:41:54 root] (utils.py 283): INFO Epoch: [26] [1340/2502] eta: 0:45:20 lr: 0.000002 loss_cls: 4.1190 (3.9113) grad_norm: 2.4341 (2.4442) time: 2.3376 data: 0.0003 max mem: 8426 +[2024-12-11 02:42:18 root] (utils.py 283): INFO Epoch: [26] [1350/2502] eta: 0:44:58 lr: 0.000002 loss_cls: 4.1873 (3.9126) grad_norm: 2.4198 (2.4443) time: 2.3658 data: 0.0003 max mem: 8426 +[2024-12-11 02:42:42 root] (utils.py 283): INFO Epoch: [26] [1360/2502] eta: 0:44:35 lr: 0.000002 loss_cls: 4.1116 (3.9127) grad_norm: 2.4517 (2.4441) time: 2.4308 data: 0.0002 max mem: 8426 +[2024-12-11 02:43:06 root] (utils.py 283): INFO Epoch: [26] [1370/2502] eta: 0:44:12 lr: 0.000002 loss_cls: 3.9069 (3.9119) grad_norm: 2.4522 (2.4446) time: 2.3917 data: 0.0003 max mem: 8426 +[2024-12-11 02:43:30 root] (utils.py 283): INFO Epoch: [26] [1380/2502] eta: 0:43:49 lr: 0.000002 loss_cls: 3.9259 (3.9117) grad_norm: 2.4889 (2.4445) time: 2.3821 data: 0.0003 max mem: 8426 +[2024-12-11 02:43:53 root] (utils.py 283): INFO Epoch: [26] [1390/2502] eta: 0:43:25 lr: 0.000002 loss_cls: 3.9298 (3.9123) grad_norm: 2.3410 (2.4438) time: 2.3535 data: 0.0003 max mem: 8426 +[2024-12-11 02:44:16 root] (utils.py 283): INFO Epoch: [26] [1400/2502] eta: 0:43:01 lr: 0.000002 loss_cls: 3.7626 (3.9086) grad_norm: 2.3728 (2.4441) time: 2.3058 data: 0.0003 max mem: 8426 +[2024-12-11 02:44:40 root] (utils.py 283): INFO Epoch: [26] [1410/2502] eta: 0:42:38 lr: 0.000002 loss_cls: 3.6841 (3.9096) grad_norm: 2.4260 (2.4437) time: 2.3559 data: 0.0003 max mem: 8426 +[2024-12-11 02:45:04 root] (utils.py 283): INFO Epoch: [26] [1420/2502] eta: 0:42:15 lr: 0.000002 loss_cls: 4.0973 (3.9083) grad_norm: 2.4260 (2.4440) time: 2.3966 data: 0.0003 max mem: 8426 +[2024-12-11 02:45:28 root] (utils.py 283): INFO Epoch: [26] [1430/2502] eta: 0:41:52 lr: 0.000002 loss_cls: 3.7767 (3.9077) grad_norm: 2.4689 (2.4440) time: 2.3926 data: 0.0003 max mem: 8426 +[2024-12-11 02:45:52 root] (utils.py 283): INFO Epoch: [26] [1440/2502] eta: 0:41:29 lr: 0.000002 loss_cls: 3.8689 (3.9073) grad_norm: 2.3945 (2.4438) time: 2.3995 data: 0.0002 max mem: 8426 +[2024-12-11 02:46:16 root] (utils.py 283): INFO Epoch: [26] [1450/2502] eta: 0:41:06 lr: 0.000002 loss_cls: 3.8779 (3.9065) grad_norm: 2.3943 (2.4436) time: 2.4190 data: 0.0002 max mem: 8426 +[2024-12-11 02:46:40 root] (utils.py 283): INFO Epoch: [26] [1460/2502] eta: 0:40:43 lr: 0.000002 loss_cls: 3.7819 (3.9066) grad_norm: 2.4459 (2.4447) time: 2.3957 data: 0.0003 max mem: 8426 +[2024-12-11 02:47:04 root] (utils.py 283): INFO Epoch: [26] [1470/2502] eta: 0:40:20 lr: 0.000002 loss_cls: 4.1008 (3.9087) grad_norm: 2.5172 (2.4450) time: 2.3814 data: 0.0003 max mem: 8426 +[2024-12-11 02:47:28 root] (utils.py 283): INFO Epoch: [26] [1480/2502] eta: 0:39:57 lr: 0.000002 loss_cls: 4.1008 (3.9080) grad_norm: 2.4664 (2.4453) time: 2.4108 data: 0.0003 max mem: 8426 +[2024-12-11 02:47:52 root] (utils.py 283): INFO Epoch: [26] [1490/2502] eta: 0:39:34 lr: 0.000002 loss_cls: 3.9393 (3.9088) grad_norm: 2.4534 (2.4456) time: 2.3970 data: 0.0003 max mem: 8426 +[2024-12-11 02:48:17 root] (utils.py 283): INFO Epoch: [26] [1500/2502] eta: 0:39:11 lr: 0.000002 loss_cls: 3.7577 (3.9065) grad_norm: 2.4534 (2.4457) time: 2.4306 data: 0.0003 max mem: 8426 +[2024-12-11 02:48:41 root] (utils.py 283): INFO Epoch: [26] [1510/2502] eta: 0:38:48 lr: 0.000002 loss_cls: 3.7780 (3.9075) grad_norm: 2.4121 (2.4454) time: 2.4696 data: 0.0003 max mem: 8426 +[2024-12-11 02:49:05 root] (utils.py 283): INFO Epoch: [26] [1520/2502] eta: 0:38:25 lr: 0.000002 loss_cls: 3.9000 (3.9071) grad_norm: 2.4158 (2.4452) time: 2.4120 data: 0.0002 max mem: 8426 +[2024-12-11 02:49:29 root] (utils.py 283): INFO Epoch: [26] [1530/2502] eta: 0:38:02 lr: 0.000002 loss_cls: 3.9826 (3.9079) grad_norm: 2.4312 (2.4454) time: 2.4049 data: 0.0002 max mem: 8426 +[2024-12-11 02:49:53 root] (utils.py 283): INFO Epoch: [26] [1540/2502] eta: 0:37:39 lr: 0.000002 loss_cls: 3.9438 (3.9066) grad_norm: 2.4342 (2.4453) time: 2.4227 data: 0.0002 max mem: 8426 +[2024-12-11 02:50:18 root] (utils.py 283): INFO Epoch: [26] [1550/2502] eta: 0:37:16 lr: 0.000002 loss_cls: 3.9438 (3.9068) grad_norm: 2.4342 (2.4454) time: 2.4306 data: 0.0002 max mem: 8426 +[2024-12-11 02:50:42 root] (utils.py 283): INFO Epoch: [26] [1560/2502] eta: 0:36:53 lr: 0.000002 loss_cls: 3.6135 (3.9053) grad_norm: 2.3942 (2.4452) time: 2.4239 data: 0.0003 max mem: 8426 +[2024-12-11 02:51:06 root] (utils.py 283): INFO Epoch: [26] [1570/2502] eta: 0:36:30 lr: 0.000002 loss_cls: 3.4172 (3.9024) grad_norm: 2.4078 (2.4451) time: 2.4013 data: 0.0003 max mem: 8426 +[2024-12-11 02:51:31 root] (utils.py 283): INFO Epoch: [26] [1580/2502] eta: 0:36:07 lr: 0.000002 loss_cls: 3.6928 (3.9028) grad_norm: 2.4308 (2.4452) time: 2.4438 data: 0.0003 max mem: 8426 +[2024-12-11 02:51:55 root] (utils.py 283): INFO Epoch: [26] [1590/2502] eta: 0:35:44 lr: 0.000002 loss_cls: 3.7703 (3.9006) grad_norm: 2.3744 (2.4447) time: 2.4509 data: 0.0003 max mem: 8426 +[2024-12-11 02:52:18 root] (utils.py 283): INFO Epoch: [26] [1600/2502] eta: 0:35:21 lr: 0.000002 loss_cls: 3.7832 (3.9009) grad_norm: 2.3744 (2.4448) time: 2.3870 data: 0.0003 max mem: 8426 +[2024-12-11 02:52:42 root] (utils.py 283): INFO Epoch: [26] [1610/2502] eta: 0:34:57 lr: 0.000002 loss_cls: 4.0602 (3.9008) grad_norm: 2.4413 (2.4445) time: 2.3528 data: 0.0003 max mem: 8426 +[2024-12-11 02:53:05 root] (utils.py 283): INFO Epoch: [26] [1620/2502] eta: 0:34:33 lr: 0.000002 loss_cls: 3.7931 (3.9003) grad_norm: 2.4413 (2.4445) time: 2.3448 data: 0.0003 max mem: 8426 +[2024-12-11 02:53:29 root] (utils.py 283): INFO Epoch: [26] [1630/2502] eta: 0:34:10 lr: 0.000002 loss_cls: 3.9685 (3.9000) grad_norm: 2.4044 (2.4441) time: 2.3527 data: 0.0003 max mem: 8426 +[2024-12-11 02:53:51 root] (utils.py 283): INFO Epoch: [26] [1640/2502] eta: 0:33:46 lr: 0.000002 loss_cls: 4.0186 (3.8993) grad_norm: 2.3371 (2.4440) time: 2.2824 data: 0.0003 max mem: 8426 +[2024-12-11 02:54:14 root] (utils.py 283): INFO Epoch: [26] [1650/2502] eta: 0:33:22 lr: 0.000002 loss_cls: 4.1702 (3.9006) grad_norm: 2.3710 (2.4435) time: 2.2772 data: 0.0003 max mem: 8426 +[2024-12-11 02:54:46 root] (utils.py 283): INFO Epoch: [26] [1660/2502] eta: 0:33:03 lr: 0.000002 loss_cls: 4.1753 (3.9009) grad_norm: 2.4124 (2.4437) time: 2.7665 data: 0.0002 max mem: 8426 +[2024-12-11 02:55:15 root] (utils.py 283): INFO Epoch: [26] [1670/2502] eta: 0:32:42 lr: 0.000002 loss_cls: 4.1287 (3.9019) grad_norm: 2.4717 (2.4441) time: 3.0120 data: 0.0003 max mem: 8426 +[2024-12-11 02:55:38 root] (utils.py 283): INFO Epoch: [26] [1680/2502] eta: 0:32:18 lr: 0.000002 loss_cls: 4.1287 (3.9020) grad_norm: 2.4717 (2.4440) time: 2.5871 data: 0.0003 max mem: 8426 +[2024-12-11 02:56:02 root] (utils.py 283): INFO Epoch: [26] [1690/2502] eta: 0:31:55 lr: 0.000002 loss_cls: 3.8189 (3.9011) grad_norm: 2.3889 (2.4438) time: 2.3476 data: 0.0003 max mem: 8426 +[2024-12-11 02:56:26 root] (utils.py 283): INFO Epoch: [26] [1700/2502] eta: 0:31:31 lr: 0.000002 loss_cls: 3.8722 (3.9020) grad_norm: 2.3793 (2.4437) time: 2.3741 data: 0.0002 max mem: 8426 +[2024-12-11 02:56:49 root] (utils.py 283): INFO Epoch: [26] [1710/2502] eta: 0:31:08 lr: 0.000002 loss_cls: 3.9181 (3.9012) grad_norm: 2.4719 (2.4441) time: 2.3792 data: 0.0002 max mem: 8426 +[2024-12-11 02:57:13 root] (utils.py 283): INFO Epoch: [26] [1720/2502] eta: 0:30:44 lr: 0.000002 loss_cls: 4.0278 (3.9026) grad_norm: 2.4213 (2.4439) time: 2.3988 data: 0.0003 max mem: 8426 +[2024-12-11 02:57:37 root] (utils.py 283): INFO Epoch: [26] [1730/2502] eta: 0:30:21 lr: 0.000002 loss_cls: 4.0278 (3.9021) grad_norm: 2.4193 (2.4440) time: 2.3859 data: 0.0003 max mem: 8426 +[2024-12-11 02:58:01 root] (utils.py 283): INFO Epoch: [26] [1740/2502] eta: 0:29:57 lr: 0.000002 loss_cls: 4.1814 (3.9032) grad_norm: 2.4437 (2.4442) time: 2.3607 data: 0.0003 max mem: 8426 +[2024-12-11 02:58:25 root] (utils.py 283): INFO Epoch: [26] [1750/2502] eta: 0:29:34 lr: 0.000002 loss_cls: 3.9967 (3.9027) grad_norm: 2.4114 (2.4441) time: 2.4148 data: 0.0002 max mem: 8426 +[2024-12-11 02:58:50 root] (utils.py 283): INFO Epoch: [26] [1760/2502] eta: 0:29:11 lr: 0.000002 loss_cls: 3.9699 (3.9040) grad_norm: 2.4592 (2.4444) time: 2.4658 data: 0.0002 max mem: 8426 +[2024-12-11 02:59:14 root] (utils.py 283): INFO Epoch: [26] [1770/2502] eta: 0:28:47 lr: 0.000002 loss_cls: 4.1387 (3.9039) grad_norm: 2.4592 (2.4441) time: 2.4233 data: 0.0002 max mem: 8426 +[2024-12-11 02:59:37 root] (utils.py 283): INFO Epoch: [26] [1780/2502] eta: 0:28:24 lr: 0.000002 loss_cls: 4.2256 (3.9055) grad_norm: 2.4018 (2.4437) time: 2.3721 data: 0.0002 max mem: 8426 +[2024-12-11 03:00:01 root] (utils.py 283): INFO Epoch: [26] [1790/2502] eta: 0:28:00 lr: 0.000002 loss_cls: 4.2593 (3.9063) grad_norm: 2.3915 (2.4435) time: 2.3658 data: 0.0003 max mem: 8426 +[2024-12-11 03:00:26 root] (utils.py 283): INFO Epoch: [26] [1800/2502] eta: 0:27:37 lr: 0.000002 loss_cls: 4.3193 (3.9085) grad_norm: 2.3915 (2.4439) time: 2.4362 data: 0.0003 max mem: 8426 +[2024-12-11 03:00:50 root] (utils.py 283): INFO Epoch: [26] [1810/2502] eta: 0:27:14 lr: 0.000002 loss_cls: 4.1594 (3.9089) grad_norm: 2.4952 (2.4443) time: 2.4629 data: 0.0002 max mem: 8426 +[2024-12-11 03:01:14 root] (utils.py 283): INFO Epoch: [26] [1820/2502] eta: 0:26:50 lr: 0.000002 loss_cls: 3.9076 (3.9077) grad_norm: 2.4933 (2.4448) time: 2.3965 data: 0.0002 max mem: 8426 +[2024-12-11 03:01:38 root] (utils.py 283): INFO Epoch: [26] [1830/2502] eta: 0:26:27 lr: 0.000002 loss_cls: 3.9718 (3.9079) grad_norm: 2.4946 (2.4453) time: 2.3803 data: 0.0002 max mem: 8426 +[2024-12-11 03:02:01 root] (utils.py 283): INFO Epoch: [26] [1840/2502] eta: 0:26:03 lr: 0.000002 loss_cls: 4.2432 (3.9087) grad_norm: 2.5040 (2.4453) time: 2.3482 data: 0.0002 max mem: 8426 +[2024-12-11 03:02:25 root] (utils.py 283): INFO Epoch: [26] [1850/2502] eta: 0:25:39 lr: 0.000002 loss_cls: 3.8812 (3.9065) grad_norm: 2.4265 (2.4453) time: 2.3511 data: 0.0002 max mem: 8426 +[2024-12-11 03:02:49 root] (utils.py 283): INFO Epoch: [26] [1860/2502] eta: 0:25:16 lr: 0.000002 loss_cls: 3.7895 (3.9064) grad_norm: 2.3804 (2.4449) time: 2.3788 data: 0.0002 max mem: 8426 +[2024-12-11 03:03:12 root] (utils.py 283): INFO Epoch: [26] [1870/2502] eta: 0:24:52 lr: 0.000002 loss_cls: 3.9571 (3.9066) grad_norm: 2.3824 (2.4449) time: 2.3571 data: 0.0002 max mem: 8426 +[2024-12-11 03:03:36 root] (utils.py 283): INFO Epoch: [26] [1880/2502] eta: 0:24:28 lr: 0.000002 loss_cls: 3.9571 (3.9065) grad_norm: 2.3893 (2.4448) time: 2.3427 data: 0.0002 max mem: 8426 +[2024-12-11 03:04:00 root] (utils.py 283): INFO Epoch: [26] [1890/2502] eta: 0:24:05 lr: 0.000002 loss_cls: 4.0787 (3.9073) grad_norm: 2.4452 (2.4449) time: 2.3770 data: 0.0003 max mem: 8426 +[2024-12-11 03:04:23 root] (utils.py 283): INFO Epoch: [26] [1900/2502] eta: 0:23:41 lr: 0.000002 loss_cls: 4.1373 (3.9074) grad_norm: 2.4278 (2.4446) time: 2.3889 data: 0.0003 max mem: 8426 +[2024-12-11 03:04:46 root] (utils.py 283): INFO Epoch: [26] [1910/2502] eta: 0:23:17 lr: 0.000002 loss_cls: 4.1373 (3.9069) grad_norm: 2.4292 (2.4450) time: 2.3320 data: 0.0002 max mem: 8426 +[2024-12-11 03:05:11 root] (utils.py 283): INFO Epoch: [26] [1920/2502] eta: 0:22:54 lr: 0.000002 loss_cls: 4.1637 (3.9072) grad_norm: 2.4700 (2.4451) time: 2.3618 data: 0.0002 max mem: 8426 +[2024-12-11 03:05:35 root] (utils.py 283): INFO Epoch: [26] [1930/2502] eta: 0:22:31 lr: 0.000002 loss_cls: 3.8970 (3.9070) grad_norm: 2.4017 (2.4448) time: 2.4253 data: 0.0002 max mem: 8426 +[2024-12-11 03:05:59 root] (utils.py 283): INFO Epoch: [26] [1940/2502] eta: 0:22:07 lr: 0.000002 loss_cls: 3.8520 (3.9073) grad_norm: 2.3963 (2.4447) time: 2.4010 data: 0.0003 max mem: 8426 +[2024-12-11 03:06:23 root] (utils.py 283): INFO Epoch: [26] [1950/2502] eta: 0:21:44 lr: 0.000002 loss_cls: 3.8455 (3.9066) grad_norm: 2.3563 (2.4444) time: 2.3859 data: 0.0003 max mem: 8426 +[2024-12-11 03:06:47 root] (utils.py 283): INFO Epoch: [26] [1960/2502] eta: 0:21:20 lr: 0.000002 loss_cls: 3.5666 (3.9050) grad_norm: 2.3563 (2.4446) time: 2.4055 data: 0.0003 max mem: 8426 +[2024-12-11 03:07:11 root] (utils.py 283): INFO Epoch: [26] [1970/2502] eta: 0:20:57 lr: 0.000002 loss_cls: 3.7016 (3.9059) grad_norm: 2.4040 (2.4447) time: 2.4424 data: 0.0002 max mem: 8426 +[2024-12-11 03:07:36 root] (utils.py 283): INFO Epoch: [26] [1980/2502] eta: 0:20:33 lr: 0.000002 loss_cls: 3.9520 (3.9046) grad_norm: 2.4718 (2.4450) time: 2.4707 data: 0.0002 max mem: 8426 +[2024-12-11 03:07:59 root] (utils.py 283): INFO Epoch: [26] [1990/2502] eta: 0:20:09 lr: 0.000002 loss_cls: 3.8545 (3.9049) grad_norm: 2.4557 (2.4448) time: 2.3590 data: 0.0003 max mem: 8426 +[2024-12-11 03:08:21 root] (utils.py 283): INFO Epoch: [26] [2000/2502] eta: 0:19:46 lr: 0.000002 loss_cls: 3.8477 (3.9044) grad_norm: 2.4420 (2.4447) time: 2.2537 data: 0.0003 max mem: 8426 +[2024-12-11 03:08:44 root] (utils.py 283): INFO Epoch: [26] [2010/2502] eta: 0:19:22 lr: 0.000002 loss_cls: 3.8419 (3.9041) grad_norm: 2.3671 (2.4443) time: 2.2891 data: 0.0003 max mem: 8426 +[2024-12-11 03:09:08 root] (utils.py 283): INFO Epoch: [26] [2020/2502] eta: 0:18:58 lr: 0.000002 loss_cls: 4.1831 (3.9050) grad_norm: 2.3671 (2.4441) time: 2.3224 data: 0.0003 max mem: 8426 +[2024-12-11 03:09:31 root] (utils.py 283): INFO Epoch: [26] [2030/2502] eta: 0:18:34 lr: 0.000002 loss_cls: 4.0525 (3.9033) grad_norm: 2.4542 (2.4440) time: 2.3331 data: 0.0003 max mem: 8426 +[2024-12-11 03:09:53 root] (utils.py 283): INFO Epoch: [26] [2040/2502] eta: 0:18:10 lr: 0.000002 loss_cls: 3.7279 (3.9034) grad_norm: 2.4710 (2.4443) time: 2.2802 data: 0.0003 max mem: 8426 +[2024-12-11 03:10:17 root] (utils.py 283): INFO Epoch: [26] [2050/2502] eta: 0:17:47 lr: 0.000002 loss_cls: 3.8432 (3.9025) grad_norm: 2.4165 (2.4443) time: 2.3046 data: 0.0003 max mem: 8426 +[2024-12-11 03:10:41 root] (utils.py 283): INFO Epoch: [26] [2060/2502] eta: 0:17:23 lr: 0.000002 loss_cls: 3.4215 (3.9010) grad_norm: 2.3640 (2.4441) time: 2.3664 data: 0.0003 max mem: 8426 +[2024-12-11 03:11:04 root] (utils.py 283): INFO Epoch: [26] [2070/2502] eta: 0:17:00 lr: 0.000002 loss_cls: 3.4215 (3.8999) grad_norm: 2.4106 (2.4440) time: 2.3565 data: 0.0003 max mem: 8426 +[2024-12-11 03:11:28 root] (utils.py 283): INFO Epoch: [26] [2080/2502] eta: 0:16:36 lr: 0.000002 loss_cls: 4.0575 (3.9006) grad_norm: 2.4153 (2.4439) time: 2.3614 data: 0.0003 max mem: 8426 +[2024-12-11 03:11:51 root] (utils.py 283): INFO Epoch: [26] [2090/2502] eta: 0:16:12 lr: 0.000002 loss_cls: 3.9574 (3.8996) grad_norm: 2.4162 (2.4440) time: 2.3391 data: 0.0002 max mem: 8426 +[2024-12-11 03:12:15 root] (utils.py 283): INFO Epoch: [26] [2100/2502] eta: 0:15:49 lr: 0.000002 loss_cls: 3.6151 (3.8995) grad_norm: 2.4162 (2.4440) time: 2.3560 data: 0.0003 max mem: 8426 +[2024-12-11 03:12:39 root] (utils.py 283): INFO Epoch: [26] [2110/2502] eta: 0:15:25 lr: 0.000002 loss_cls: 3.9228 (3.8999) grad_norm: 2.4432 (2.4443) time: 2.3824 data: 0.0003 max mem: 8426 +[2024-12-11 03:13:02 root] (utils.py 283): INFO Epoch: [26] [2120/2502] eta: 0:15:02 lr: 0.000002 loss_cls: 4.0747 (3.9005) grad_norm: 2.4333 (2.4442) time: 2.3535 data: 0.0003 max mem: 8426 +[2024-12-11 03:13:25 root] (utils.py 283): INFO Epoch: [26] [2130/2502] eta: 0:14:38 lr: 0.000002 loss_cls: 4.0747 (3.9017) grad_norm: 2.3648 (2.4441) time: 2.3125 data: 0.0003 max mem: 8426 +[2024-12-11 03:13:48 root] (utils.py 283): INFO Epoch: [26] [2140/2502] eta: 0:14:14 lr: 0.000002 loss_cls: 4.2389 (3.9018) grad_norm: 2.3699 (2.4441) time: 2.3211 data: 0.0003 max mem: 8426 +[2024-12-11 03:14:12 root] (utils.py 283): INFO Epoch: [26] [2150/2502] eta: 0:13:51 lr: 0.000002 loss_cls: 3.7381 (3.9004) grad_norm: 2.4321 (2.4443) time: 2.3747 data: 0.0003 max mem: 8426 +[2024-12-11 03:14:36 root] (utils.py 283): INFO Epoch: [26] [2160/2502] eta: 0:13:27 lr: 0.000002 loss_cls: 3.6461 (3.9006) grad_norm: 2.4900 (2.4445) time: 2.3847 data: 0.0003 max mem: 8426 +[2024-12-11 03:15:00 root] (utils.py 283): INFO Epoch: [26] [2170/2502] eta: 0:13:03 lr: 0.000002 loss_cls: 3.9898 (3.9010) grad_norm: 2.4307 (2.4444) time: 2.3954 data: 0.0003 max mem: 8426 +[2024-12-11 03:15:24 root] (utils.py 283): INFO Epoch: [26] [2180/2502] eta: 0:12:40 lr: 0.000002 loss_cls: 3.8850 (3.9001) grad_norm: 2.4221 (2.4444) time: 2.3912 data: 0.0002 max mem: 8426 +[2024-12-11 03:15:48 root] (utils.py 283): INFO Epoch: [26] [2190/2502] eta: 0:12:16 lr: 0.000002 loss_cls: 3.7332 (3.8998) grad_norm: 2.4170 (2.4443) time: 2.3834 data: 0.0003 max mem: 8426 +[2024-12-11 03:16:12 root] (utils.py 283): INFO Epoch: [26] [2200/2502] eta: 0:11:53 lr: 0.000002 loss_cls: 3.9025 (3.8986) grad_norm: 2.4183 (2.4445) time: 2.4022 data: 0.0003 max mem: 8426 +[2024-12-11 03:16:36 root] (utils.py 283): INFO Epoch: [26] [2210/2502] eta: 0:11:29 lr: 0.000002 loss_cls: 3.9466 (3.8978) grad_norm: 2.4353 (2.4442) time: 2.4023 data: 0.0003 max mem: 8426 +[2024-12-11 03:17:00 root] (utils.py 283): INFO Epoch: [26] [2220/2502] eta: 0:11:06 lr: 0.000002 loss_cls: 3.9340 (3.8975) grad_norm: 2.3983 (2.4440) time: 2.4096 data: 0.0003 max mem: 8426 +[2024-12-11 03:17:24 root] (utils.py 283): INFO Epoch: [26] [2230/2502] eta: 0:10:42 lr: 0.000002 loss_cls: 3.8552 (3.8974) grad_norm: 2.4147 (2.4441) time: 2.3958 data: 0.0003 max mem: 8426 +[2024-12-11 03:17:47 root] (utils.py 283): INFO Epoch: [26] [2240/2502] eta: 0:10:18 lr: 0.000002 loss_cls: 3.9699 (3.8986) grad_norm: 2.3971 (2.4440) time: 2.3647 data: 0.0003 max mem: 8426 +[2024-12-11 03:18:11 root] (utils.py 283): INFO Epoch: [26] [2250/2502] eta: 0:09:55 lr: 0.000002 loss_cls: 4.1787 (3.8985) grad_norm: 2.3971 (2.4439) time: 2.3564 data: 0.0003 max mem: 8426 +[2024-12-11 03:18:35 root] (utils.py 283): INFO Epoch: [26] [2260/2502] eta: 0:09:31 lr: 0.000002 loss_cls: 3.8185 (3.8975) grad_norm: 2.4308 (2.4441) time: 2.3868 data: 0.0003 max mem: 8426 +[2024-12-11 03:18:59 root] (utils.py 283): INFO Epoch: [26] [2270/2502] eta: 0:09:08 lr: 0.000002 loss_cls: 3.5246 (3.8953) grad_norm: 2.3831 (2.4437) time: 2.4122 data: 0.0003 max mem: 8426 +[2024-12-11 03:19:23 root] (utils.py 283): INFO Epoch: [26] [2280/2502] eta: 0:08:44 lr: 0.000002 loss_cls: 3.5246 (3.8951) grad_norm: 2.3559 (2.4435) time: 2.3805 data: 0.0003 max mem: 8426 +[2024-12-11 03:19:47 root] (utils.py 283): INFO Epoch: [26] [2290/2502] eta: 0:08:20 lr: 0.000002 loss_cls: 3.8740 (3.8957) grad_norm: 2.4089 (2.4438) time: 2.3668 data: 0.0003 max mem: 8426 +[2024-12-11 03:20:11 root] (utils.py 283): INFO Epoch: [26] [2300/2502] eta: 0:07:57 lr: 0.000002 loss_cls: 3.8740 (3.8959) grad_norm: 2.3794 (2.4436) time: 2.4266 data: 0.0003 max mem: 8426 +[2024-12-11 03:20:35 root] (utils.py 283): INFO Epoch: [26] [2310/2502] eta: 0:07:33 lr: 0.000002 loss_cls: 4.2124 (3.8962) grad_norm: 2.3829 (2.4435) time: 2.4277 data: 0.0003 max mem: 8426 +[2024-12-11 03:20:59 root] (utils.py 283): INFO Epoch: [26] [2320/2502] eta: 0:07:10 lr: 0.000002 loss_cls: 3.8239 (3.8959) grad_norm: 2.3924 (2.4431) time: 2.3606 data: 0.0003 max mem: 8426 +[2024-12-11 03:21:23 root] (utils.py 283): INFO Epoch: [26] [2330/2502] eta: 0:06:46 lr: 0.000002 loss_cls: 3.7871 (3.8955) grad_norm: 2.4123 (2.4434) time: 2.3650 data: 0.0003 max mem: 8426 +[2024-12-11 03:21:46 root] (utils.py 283): INFO Epoch: [26] [2340/2502] eta: 0:06:22 lr: 0.000002 loss_cls: 3.7849 (3.8958) grad_norm: 2.4645 (2.4432) time: 2.3898 data: 0.0003 max mem: 8426 +[2024-12-11 03:22:11 root] (utils.py 283): INFO Epoch: [26] [2350/2502] eta: 0:05:59 lr: 0.000002 loss_cls: 3.7849 (3.8949) grad_norm: 2.3879 (2.4432) time: 2.4073 data: 0.0003 max mem: 8426 +[2024-12-11 03:22:34 root] (utils.py 283): INFO Epoch: [26] [2360/2502] eta: 0:05:35 lr: 0.000002 loss_cls: 3.8371 (3.8955) grad_norm: 2.4171 (2.4432) time: 2.3933 data: 0.0003 max mem: 8426 +[2024-12-11 03:22:58 root] (utils.py 283): INFO Epoch: [26] [2370/2502] eta: 0:05:12 lr: 0.000002 loss_cls: 3.9617 (3.8948) grad_norm: 2.4001 (2.4429) time: 2.3834 data: 0.0003 max mem: 8426 +[2024-12-11 03:23:23 root] (utils.py 283): INFO Epoch: [26] [2380/2502] eta: 0:04:48 lr: 0.000002 loss_cls: 3.9420 (3.8945) grad_norm: 2.3303 (2.4428) time: 2.4310 data: 0.0003 max mem: 8426 +[2024-12-11 03:23:47 root] (utils.py 283): INFO Epoch: [26] [2390/2502] eta: 0:04:24 lr: 0.000002 loss_cls: 4.1066 (3.8950) grad_norm: 2.4117 (2.4427) time: 2.4384 data: 0.0003 max mem: 8426 +[2024-12-11 03:24:12 root] (utils.py 283): INFO Epoch: [26] [2400/2502] eta: 0:04:01 lr: 0.000002 loss_cls: 3.8945 (3.8941) grad_norm: 2.4525 (2.4431) time: 2.4613 data: 0.0003 max mem: 8426 +[2024-12-11 03:24:36 root] (utils.py 283): INFO Epoch: [26] [2410/2502] eta: 0:03:37 lr: 0.000002 loss_cls: 3.6659 (3.8937) grad_norm: 2.4440 (2.4430) time: 2.4447 data: 0.0003 max mem: 8426 +[2024-12-11 03:25:00 root] (utils.py 283): INFO Epoch: [26] [2420/2502] eta: 0:03:13 lr: 0.000002 loss_cls: 4.1085 (3.8939) grad_norm: 2.4276 (2.4429) time: 2.3786 data: 0.0003 max mem: 8426 +[2024-12-11 03:25:24 root] (utils.py 283): INFO Epoch: [26] [2430/2502] eta: 0:02:50 lr: 0.000002 loss_cls: 3.6875 (3.8930) grad_norm: 2.4126 (2.4429) time: 2.4086 data: 0.0002 max mem: 8426 +[2024-12-11 03:25:48 root] (utils.py 283): INFO Epoch: [26] [2440/2502] eta: 0:02:26 lr: 0.000002 loss_cls: 3.6917 (3.8922) grad_norm: 2.4405 (2.4430) time: 2.4251 data: 0.0002 max mem: 8426 +[2024-12-11 03:26:11 root] (utils.py 283): INFO Epoch: [26] [2450/2502] eta: 0:02:03 lr: 0.000002 loss_cls: 4.0414 (3.8934) grad_norm: 2.4405 (2.4429) time: 2.3580 data: 0.0003 max mem: 8426 +[2024-12-11 03:26:35 root] (utils.py 283): INFO Epoch: [26] [2460/2502] eta: 0:01:39 lr: 0.000002 loss_cls: 3.9462 (3.8928) grad_norm: 2.4127 (2.4430) time: 2.3490 data: 0.0003 max mem: 8426 +[2024-12-11 03:27:00 root] (utils.py 283): INFO Epoch: [26] [2470/2502] eta: 0:01:15 lr: 0.000002 loss_cls: 3.8698 (3.8927) grad_norm: 2.4030 (2.4427) time: 2.4174 data: 0.0002 max mem: 8426 +[2024-12-11 03:27:24 root] (utils.py 283): INFO Epoch: [26] [2480/2502] eta: 0:00:52 lr: 0.000002 loss_cls: 3.8533 (3.8910) grad_norm: 2.3399 (2.4426) time: 2.4400 data: 0.0002 max mem: 8426 +[2024-12-11 03:27:48 root] (utils.py 283): INFO Epoch: [26] [2490/2502] eta: 0:00:28 lr: 0.000002 loss_cls: 3.8562 (3.8912) grad_norm: 2.4039 (2.4425) time: 2.3948 data: 0.0256 max mem: 8426 +[2024-12-11 03:28:12 root] (utils.py 283): INFO Epoch: [26] [2500/2502] eta: 0:00:04 lr: 0.000002 loss_cls: 3.4394 (3.8896) grad_norm: 2.4540 (2.4428) time: 2.3789 data: 0.0256 max mem: 8426 +[2024-12-11 03:28:14 root] (utils.py 283): INFO Epoch: [26] [2501/2502] eta: 0:00:02 lr: 0.000002 loss_cls: 3.4394 (3.8895) grad_norm: 2.4689 (2.4428) time: 2.4092 data: 0.0256 max mem: 8426 +[2024-12-11 03:28:14 root] (utils.py 297): INFO Epoch: [26] Total time: 1:38:40 (2.3664 s / it) +[2024-12-11 03:28:14 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 3.4394 (3.8791) grad_norm: 2.4689 (2.4428) +[2024-12-11 03:28:15 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:28 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6276 (0.6276) acc1: 87.5000 (87.5000) acc3: 96.8750 (96.8750) acc5: 98.4375 (98.4375) time: 0.2868 data: 0.0006 max mem: 8426 +[2024-12-11 03:28:17 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:23 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7258 (0.7988) acc1: 85.9375 (82.6705) acc3: 96.0938 (94.1761) acc5: 97.6562 (96.5199) time: 0.2627 data: 0.0004 max mem: 8426 +[2024-12-11 03:28:20 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:18 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8340 (0.8564) acc1: 80.4688 (81.3244) acc3: 92.1875 (93.1920) acc5: 95.3125 (95.6101) time: 0.2367 data: 0.0004 max mem: 8426 +[2024-12-11 03:28:21 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9458 (0.8689) acc1: 79.6875 (80.5696) acc3: 92.9688 (93.2208) acc5: 95.3125 (95.7661) time: 0.1966 data: 0.0005 max mem: 8426 +[2024-12-11 03:28:24 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8139 (0.8587) acc1: 79.6875 (80.9261) acc3: 94.5312 (93.2927) acc5: 96.8750 (95.8841) time: 0.2041 data: 0.0004 max mem: 8426 +[2024-12-11 03:28:25 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0343 (0.9426) acc1: 75.7812 (78.9216) acc3: 88.2812 (91.9730) acc5: 92.1875 (94.8683) time: 0.2030 data: 0.0004 max mem: 8426 +[2024-12-11 03:28:27 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1940 (0.9881) acc1: 73.4375 (78.2018) acc3: 85.9375 (91.0733) acc5: 89.8438 (94.0830) time: 0.1802 data: 0.0004 max mem: 8426 +[2024-12-11 03:28:30 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1779 (1.0274) acc1: 75.0000 (77.2997) acc3: 85.9375 (90.5260) acc5: 90.6250 (93.6510) time: 0.2036 data: 0.0004 max mem: 8426 +[2024-12-11 03:28:31 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1911 (1.0615) acc1: 73.4375 (76.5143) acc3: 87.5000 (89.9595) acc5: 90.6250 (93.1327) time: 0.2013 data: 0.0007 max mem: 8426 +[2024-12-11 03:28:33 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2119 (1.0900) acc1: 69.5312 (75.7641) acc3: 87.5000 (89.6120) acc5: 90.6250 (92.8657) time: 0.1949 data: 0.0007 max mem: 8426 +[2024-12-11 03:28:35 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1298 (1.0783) acc1: 74.2188 (76.0080) acc3: 88.2812 (89.8000) acc5: 91.4062 (93.0400) time: 0.2041 data: 0.0006 max mem: 8426 +[2024-12-11 03:28:35 root] (utils.py 297): INFO Test: Total time: 0:00:20 (0.2072 s / it) +[2024-12-11 03:28:35 root] (engine.py 264): INFO * Acc@1 75.894 Acc@3 89.744 Acc@5 93.062 loss 1.079 flops 1.285 layer_flops 1.251 +[2024-12-11 03:28:35 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.9% +[2024-12-11 03:28:35 root] (main.py 576): INFO Max accuracy: 75.89% +[2024-12-11 03:28:38 root] (utils.py 283): INFO Epoch: [27] [ 0/2502] eta: 1:59:26 lr: 0.000002 loss_cls: 2.8962 (2.8962) grad_norm: 2.8845 (2.8845) time: 2.8642 data: 0.0003 max mem: 8426 +[2024-12-11 03:29:01 root] (utils.py 283): INFO Epoch: [27] [ 10/2502] eta: 1:37:58 lr: 0.000002 loss_cls: 4.2206 (4.0582) grad_norm: 2.5397 (2.5113) time: 2.3588 data: 0.0003 max mem: 8426 +[2024-12-11 03:29:24 root] (utils.py 283): INFO Epoch: [27] [ 20/2502] eta: 1:36:41 lr: 0.000002 loss_cls: 4.1235 (4.0984) grad_norm: 2.4098 (2.4625) time: 2.3112 data: 0.0003 max mem: 8426 +[2024-12-11 03:29:48 root] (utils.py 283): INFO Epoch: [27] [ 30/2502] eta: 1:37:20 lr: 0.000002 loss_cls: 4.0293 (4.0199) grad_norm: 2.3835 (2.4664) time: 2.3646 data: 0.0003 max mem: 8426 +[2024-12-11 03:30:12 root] (utils.py 283): INFO Epoch: [27] [ 40/2502] eta: 1:36:34 lr: 0.000002 loss_cls: 3.9866 (3.9893) grad_norm: 2.4136 (2.4614) time: 2.3708 data: 0.0003 max mem: 8426 +[2024-12-11 03:30:35 root] (utils.py 283): INFO Epoch: [27] [ 50/2502] eta: 1:36:10 lr: 0.000002 loss_cls: 4.1657 (3.9650) grad_norm: 2.4657 (2.4633) time: 2.3390 data: 0.0003 max mem: 8426 +[2024-12-11 03:30:58 root] (utils.py 283): INFO Epoch: [27] [ 60/2502] eta: 1:35:36 lr: 0.000002 loss_cls: 4.0468 (3.9671) grad_norm: 2.4657 (2.4626) time: 2.3397 data: 0.0003 max mem: 8426 +[2024-12-11 03:31:22 root] (utils.py 283): INFO Epoch: [27] [ 70/2502] eta: 1:35:09 lr: 0.000002 loss_cls: 4.0065 (3.9717) grad_norm: 2.3906 (2.4526) time: 2.3336 data: 0.0003 max mem: 8426 +[2024-12-11 03:31:45 root] (utils.py 283): INFO Epoch: [27] [ 80/2502] eta: 1:34:48 lr: 0.000002 loss_cls: 4.0808 (3.9479) grad_norm: 2.3906 (2.4479) time: 2.3473 data: 0.0003 max mem: 8426 +[2024-12-11 03:32:10 root] (utils.py 283): INFO Epoch: [27] [ 90/2502] eta: 1:34:57 lr: 0.000002 loss_cls: 4.1482 (3.9638) grad_norm: 2.3667 (2.4425) time: 2.4132 data: 0.0003 max mem: 8426 +[2024-12-11 03:32:33 root] (utils.py 283): INFO Epoch: [27] [ 100/2502] eta: 1:34:14 lr: 0.000002 loss_cls: 4.0426 (3.9471) grad_norm: 2.4065 (2.4518) time: 2.3755 data: 0.0003 max mem: 8426 +[2024-12-11 03:32:56 root] (utils.py 283): INFO Epoch: [27] [ 110/2502] eta: 1:33:49 lr: 0.000002 loss_cls: 4.0673 (3.9621) grad_norm: 2.4316 (2.4477) time: 2.3138 data: 0.0003 max mem: 8426 +[2024-12-11 03:33:20 root] (utils.py 283): INFO Epoch: [27] [ 120/2502] eta: 1:33:24 lr: 0.000002 loss_cls: 4.1253 (3.9595) grad_norm: 2.3442 (2.4444) time: 2.3475 data: 0.0002 max mem: 8426 +[2024-12-11 03:33:45 root] (utils.py 283): INFO Epoch: [27] [ 130/2502] eta: 1:33:25 lr: 0.000002 loss_cls: 3.8405 (3.9570) grad_norm: 2.3994 (2.4462) time: 2.4184 data: 0.0002 max mem: 8426 +[2024-12-11 03:34:09 root] (utils.py 283): INFO Epoch: [27] [ 140/2502] eta: 1:33:06 lr: 0.000002 loss_cls: 4.2268 (3.9625) grad_norm: 2.4144 (2.4469) time: 2.4401 data: 0.0002 max mem: 8426 +[2024-12-11 03:34:32 root] (utils.py 283): INFO Epoch: [27] [ 150/2502] eta: 1:32:32 lr: 0.000002 loss_cls: 4.2554 (3.9696) grad_norm: 2.4043 (2.4438) time: 2.3436 data: 0.0003 max mem: 8426 +[2024-12-11 03:34:56 root] (utils.py 283): INFO Epoch: [27] [ 160/2502] eta: 1:32:23 lr: 0.000002 loss_cls: 4.1717 (3.9662) grad_norm: 2.3903 (2.4404) time: 2.3777 data: 0.0003 max mem: 8426 +[2024-12-11 03:35:20 root] (utils.py 283): INFO Epoch: [27] [ 170/2502] eta: 1:31:57 lr: 0.000002 loss_cls: 3.9420 (3.9415) grad_norm: 2.3969 (2.4427) time: 2.4047 data: 0.0003 max mem: 8426 +[2024-12-11 03:35:43 root] (utils.py 283): INFO Epoch: [27] [ 180/2502] eta: 1:31:33 lr: 0.000002 loss_cls: 3.9580 (3.9517) grad_norm: 2.4042 (2.4409) time: 2.3585 data: 0.0003 max mem: 8426 +[2024-12-11 03:36:07 root] (utils.py 283): INFO Epoch: [27] [ 190/2502] eta: 1:31:12 lr: 0.000002 loss_cls: 4.0608 (3.9408) grad_norm: 2.4028 (2.4406) time: 2.3773 data: 0.0003 max mem: 8426 +[2024-12-11 03:36:30 root] (utils.py 283): INFO Epoch: [27] [ 200/2502] eta: 1:30:41 lr: 0.000002 loss_cls: 4.0479 (3.9486) grad_norm: 2.4577 (2.4419) time: 2.3438 data: 0.0003 max mem: 8426 +[2024-12-11 03:36:54 root] (utils.py 283): INFO Epoch: [27] [ 210/2502] eta: 1:30:19 lr: 0.000002 loss_cls: 4.0635 (3.9454) grad_norm: 2.4307 (2.4400) time: 2.3402 data: 0.0003 max mem: 8426 +[2024-12-11 03:37:18 root] (utils.py 283): INFO Epoch: [27] [ 220/2502] eta: 1:29:56 lr: 0.000002 loss_cls: 3.7768 (3.9327) grad_norm: 2.4138 (2.4421) time: 2.3745 data: 0.0003 max mem: 8426 +[2024-12-11 03:37:41 root] (utils.py 283): INFO Epoch: [27] [ 230/2502] eta: 1:29:32 lr: 0.000002 loss_cls: 3.3801 (3.9179) grad_norm: 2.4138 (2.4392) time: 2.3684 data: 0.0003 max mem: 8426 +[2024-12-11 03:38:05 root] (utils.py 283): INFO Epoch: [27] [ 240/2502] eta: 1:29:08 lr: 0.000002 loss_cls: 3.7427 (3.9194) grad_norm: 2.4659 (2.4426) time: 2.3602 data: 0.0003 max mem: 8426 +[2024-12-11 03:38:29 root] (utils.py 283): INFO Epoch: [27] [ 250/2502] eta: 1:28:50 lr: 0.000002 loss_cls: 4.2612 (3.9377) grad_norm: 2.5065 (2.4443) time: 2.3907 data: 0.0002 max mem: 8426 +[2024-12-11 03:38:53 root] (utils.py 283): INFO Epoch: [27] [ 260/2502] eta: 1:28:26 lr: 0.000002 loss_cls: 4.3062 (3.9414) grad_norm: 2.4371 (2.4453) time: 2.3955 data: 0.0002 max mem: 8426 +[2024-12-11 03:39:16 root] (utils.py 283): INFO Epoch: [27] [ 270/2502] eta: 1:27:58 lr: 0.000002 loss_cls: 3.8415 (3.9356) grad_norm: 2.4338 (2.4443) time: 2.3393 data: 0.0002 max mem: 8426 +[2024-12-11 03:39:39 root] (utils.py 283): INFO Epoch: [27] [ 280/2502] eta: 1:27:29 lr: 0.000002 loss_cls: 3.8151 (3.9345) grad_norm: 2.4722 (2.4480) time: 2.3105 data: 0.0002 max mem: 8426 +[2024-12-11 03:40:03 root] (utils.py 283): INFO Epoch: [27] [ 290/2502] eta: 1:27:09 lr: 0.000002 loss_cls: 3.6591 (3.9291) grad_norm: 2.4630 (2.4470) time: 2.3543 data: 0.0002 max mem: 8426 +[2024-12-11 03:40:27 root] (utils.py 283): INFO Epoch: [27] [ 300/2502] eta: 1:26:46 lr: 0.000002 loss_cls: 3.5644 (3.9139) grad_norm: 2.4177 (2.4474) time: 2.3873 data: 0.0002 max mem: 8426 +[2024-12-11 03:40:51 root] (utils.py 283): INFO Epoch: [27] [ 310/2502] eta: 1:26:25 lr: 0.000002 loss_cls: 3.4871 (3.9083) grad_norm: 2.3897 (2.4457) time: 2.3855 data: 0.0002 max mem: 8426 +[2024-12-11 03:41:14 root] (utils.py 283): INFO Epoch: [27] [ 320/2502] eta: 1:26:01 lr: 0.000002 loss_cls: 3.9793 (3.9083) grad_norm: 2.3385 (2.4443) time: 2.3824 data: 0.0003 max mem: 8426 +[2024-12-11 03:41:38 root] (utils.py 283): INFO Epoch: [27] [ 330/2502] eta: 1:25:38 lr: 0.000002 loss_cls: 4.1750 (3.9143) grad_norm: 2.4081 (2.4431) time: 2.3672 data: 0.0003 max mem: 8426 +[2024-12-11 03:42:02 root] (utils.py 283): INFO Epoch: [27] [ 340/2502] eta: 1:25:17 lr: 0.000002 loss_cls: 4.1719 (3.9186) grad_norm: 2.4081 (2.4432) time: 2.3880 data: 0.0003 max mem: 8426 +[2024-12-11 03:42:26 root] (utils.py 283): INFO Epoch: [27] [ 350/2502] eta: 1:24:53 lr: 0.000002 loss_cls: 4.1038 (3.9205) grad_norm: 2.4133 (2.4409) time: 2.3880 data: 0.0002 max mem: 8426 +[2024-12-11 03:42:50 root] (utils.py 283): INFO Epoch: [27] [ 360/2502] eta: 1:24:32 lr: 0.000002 loss_cls: 3.9692 (3.9147) grad_norm: 2.3898 (2.4420) time: 2.3897 data: 0.0003 max mem: 8426 +[2024-12-11 03:43:14 root] (utils.py 283): INFO Epoch: [27] [ 370/2502] eta: 1:24:09 lr: 0.000002 loss_cls: 3.8836 (3.9158) grad_norm: 2.4431 (2.4439) time: 2.3978 data: 0.0003 max mem: 8426 +[2024-12-11 03:43:38 root] (utils.py 283): INFO Epoch: [27] [ 380/2502] eta: 1:23:47 lr: 0.000002 loss_cls: 3.5482 (3.9056) grad_norm: 2.4229 (2.4420) time: 2.3927 data: 0.0002 max mem: 8426 +[2024-12-11 03:44:02 root] (utils.py 283): INFO Epoch: [27] [ 390/2502] eta: 1:23:24 lr: 0.000002 loss_cls: 3.7135 (3.9029) grad_norm: 2.3930 (2.4424) time: 2.3858 data: 0.0003 max mem: 8426 +[2024-12-11 03:44:25 root] (utils.py 283): INFO Epoch: [27] [ 400/2502] eta: 1:22:59 lr: 0.000002 loss_cls: 4.0835 (3.9019) grad_norm: 2.4298 (2.4404) time: 2.3597 data: 0.0003 max mem: 8426 +[2024-12-11 03:44:48 root] (utils.py 283): INFO Epoch: [27] [ 410/2502] eta: 1:22:33 lr: 0.000002 loss_cls: 3.8540 (3.8994) grad_norm: 2.4072 (2.4401) time: 2.3340 data: 0.0003 max mem: 8426 +[2024-12-11 03:45:11 root] (utils.py 283): INFO Epoch: [27] [ 420/2502] eta: 1:22:04 lr: 0.000002 loss_cls: 3.8540 (3.8955) grad_norm: 2.4468 (2.4396) time: 2.2906 data: 0.0003 max mem: 8426 +[2024-12-11 03:45:34 root] (utils.py 283): INFO Epoch: [27] [ 430/2502] eta: 1:21:39 lr: 0.000002 loss_cls: 4.1841 (3.9023) grad_norm: 2.4617 (2.4405) time: 2.2977 data: 0.0003 max mem: 8426 +[2024-12-11 03:45:58 root] (utils.py 283): INFO Epoch: [27] [ 440/2502] eta: 1:21:15 lr: 0.000002 loss_cls: 4.0861 (3.8945) grad_norm: 2.4443 (2.4392) time: 2.3549 data: 0.0003 max mem: 8426 +[2024-12-11 03:46:21 root] (utils.py 283): INFO Epoch: [27] [ 450/2502] eta: 1:20:51 lr: 0.000002 loss_cls: 3.7638 (3.8955) grad_norm: 2.4596 (2.4423) time: 2.3574 data: 0.0002 max mem: 8426 +[2024-12-11 03:46:45 root] (utils.py 283): INFO Epoch: [27] [ 460/2502] eta: 1:20:29 lr: 0.000002 loss_cls: 4.0003 (3.8921) grad_norm: 2.4928 (2.4416) time: 2.3775 data: 0.0003 max mem: 8426 +[2024-12-11 03:47:09 root] (utils.py 283): INFO Epoch: [27] [ 470/2502] eta: 1:20:06 lr: 0.000002 loss_cls: 3.9167 (3.8868) grad_norm: 2.3632 (2.4403) time: 2.3964 data: 0.0003 max mem: 8426 +[2024-12-11 03:47:33 root] (utils.py 283): INFO Epoch: [27] [ 480/2502] eta: 1:19:41 lr: 0.000002 loss_cls: 3.9167 (3.8913) grad_norm: 2.3857 (2.4409) time: 2.3511 data: 0.0003 max mem: 8426 +[2024-12-11 03:47:56 root] (utils.py 283): INFO Epoch: [27] [ 490/2502] eta: 1:19:17 lr: 0.000002 loss_cls: 3.8953 (3.8872) grad_norm: 2.4027 (2.4398) time: 2.3400 data: 0.0003 max mem: 8426 +[2024-12-11 03:48:19 root] (utils.py 283): INFO Epoch: [27] [ 500/2502] eta: 1:18:52 lr: 0.000002 loss_cls: 3.7418 (3.8813) grad_norm: 2.3362 (2.4393) time: 2.3438 data: 0.0003 max mem: 8426 +[2024-12-11 03:48:44 root] (utils.py 283): INFO Epoch: [27] [ 510/2502] eta: 1:18:32 lr: 0.000002 loss_cls: 3.8400 (3.8811) grad_norm: 2.4446 (2.4392) time: 2.4006 data: 0.0003 max mem: 8426 +[2024-12-11 03:49:07 root] (utils.py 283): INFO Epoch: [27] [ 520/2502] eta: 1:18:07 lr: 0.000002 loss_cls: 4.1286 (3.8810) grad_norm: 2.4446 (2.4398) time: 2.3954 data: 0.0003 max mem: 8426 +[2024-12-11 03:49:31 root] (utils.py 283): INFO Epoch: [27] [ 530/2502] eta: 1:17:45 lr: 0.000002 loss_cls: 3.5798 (3.8759) grad_norm: 2.4127 (2.4398) time: 2.3673 data: 0.0003 max mem: 8426 +[2024-12-11 03:49:55 root] (utils.py 283): INFO Epoch: [27] [ 540/2502] eta: 1:17:23 lr: 0.000002 loss_cls: 3.5960 (3.8776) grad_norm: 2.4370 (2.4405) time: 2.4071 data: 0.0003 max mem: 8426 +[2024-12-11 03:50:19 root] (utils.py 283): INFO Epoch: [27] [ 550/2502] eta: 1:17:00 lr: 0.000002 loss_cls: 4.1577 (3.8840) grad_norm: 2.4650 (2.4419) time: 2.3950 data: 0.0002 max mem: 8426 +[2024-12-11 03:50:43 root] (utils.py 283): INFO Epoch: [27] [ 560/2502] eta: 1:16:34 lr: 0.000002 loss_cls: 4.1369 (3.8859) grad_norm: 2.4361 (2.4416) time: 2.3526 data: 0.0003 max mem: 8426 +[2024-12-11 03:51:06 root] (utils.py 283): INFO Epoch: [27] [ 570/2502] eta: 1:16:12 lr: 0.000002 loss_cls: 3.6431 (3.8785) grad_norm: 2.4362 (2.4422) time: 2.3530 data: 0.0003 max mem: 8426 +[2024-12-11 03:51:29 root] (utils.py 283): INFO Epoch: [27] [ 580/2502] eta: 1:15:46 lr: 0.000002 loss_cls: 3.9253 (3.8858) grad_norm: 2.4494 (2.4423) time: 2.3438 data: 0.0003 max mem: 8426 +[2024-12-11 03:51:54 root] (utils.py 283): INFO Epoch: [27] [ 590/2502] eta: 1:15:24 lr: 0.000002 loss_cls: 4.1684 (3.8847) grad_norm: 2.3340 (2.4414) time: 2.3638 data: 0.0003 max mem: 8426 +[2024-12-11 03:52:17 root] (utils.py 283): INFO Epoch: [27] [ 600/2502] eta: 1:15:00 lr: 0.000002 loss_cls: 3.8951 (3.8838) grad_norm: 2.4577 (2.4419) time: 2.3941 data: 0.0003 max mem: 8426 +[2024-12-11 03:52:40 root] (utils.py 283): INFO Epoch: [27] [ 610/2502] eta: 1:14:35 lr: 0.000002 loss_cls: 3.7543 (3.8796) grad_norm: 2.4624 (2.4415) time: 2.3312 data: 0.0002 max mem: 8426 +[2024-12-11 03:53:03 root] (utils.py 283): INFO Epoch: [27] [ 620/2502] eta: 1:14:09 lr: 0.000002 loss_cls: 4.0040 (3.8860) grad_norm: 2.4707 (2.4425) time: 2.3093 data: 0.0003 max mem: 8426 +[2024-12-11 03:53:27 root] (utils.py 283): INFO Epoch: [27] [ 630/2502] eta: 1:13:45 lr: 0.000002 loss_cls: 4.3161 (3.8925) grad_norm: 2.4707 (2.4415) time: 2.3348 data: 0.0003 max mem: 8426 +[2024-12-11 03:53:50 root] (utils.py 283): INFO Epoch: [27] [ 640/2502] eta: 1:13:20 lr: 0.000002 loss_cls: 4.0636 (3.8923) grad_norm: 2.4489 (2.4426) time: 2.3330 data: 0.0003 max mem: 8426 +[2024-12-11 03:54:14 root] (utils.py 283): INFO Epoch: [27] [ 650/2502] eta: 1:12:56 lr: 0.000002 loss_cls: 4.0636 (3.8943) grad_norm: 2.4524 (2.4432) time: 2.3340 data: 0.0002 max mem: 8426 +[2024-12-11 03:54:37 root] (utils.py 283): INFO Epoch: [27] [ 660/2502] eta: 1:12:31 lr: 0.000002 loss_cls: 4.1852 (3.8963) grad_norm: 2.4009 (2.4428) time: 2.3238 data: 0.0002 max mem: 8426 +[2024-12-11 03:55:00 root] (utils.py 283): INFO Epoch: [27] [ 670/2502] eta: 1:12:07 lr: 0.000002 loss_cls: 3.9264 (3.8964) grad_norm: 2.3735 (2.4423) time: 2.3304 data: 0.0002 max mem: 8426 +[2024-12-11 03:55:25 root] (utils.py 283): INFO Epoch: [27] [ 680/2502] eta: 1:11:46 lr: 0.000002 loss_cls: 4.0126 (3.8989) grad_norm: 2.3735 (2.4422) time: 2.4170 data: 0.0002 max mem: 8426 +[2024-12-11 03:55:48 root] (utils.py 283): INFO Epoch: [27] [ 690/2502] eta: 1:11:22 lr: 0.000002 loss_cls: 4.1500 (3.9004) grad_norm: 2.4263 (2.4426) time: 2.4012 data: 0.0002 max mem: 8426 +[2024-12-11 03:56:12 root] (utils.py 283): INFO Epoch: [27] [ 700/2502] eta: 1:10:58 lr: 0.000002 loss_cls: 4.1159 (3.9006) grad_norm: 2.4040 (2.4417) time: 2.3365 data: 0.0002 max mem: 8426 +[2024-12-11 03:56:36 root] (utils.py 283): INFO Epoch: [27] [ 710/2502] eta: 1:10:36 lr: 0.000002 loss_cls: 4.0995 (3.9018) grad_norm: 2.4498 (2.4427) time: 2.3805 data: 0.0002 max mem: 8426 +[2024-12-11 03:56:59 root] (utils.py 283): INFO Epoch: [27] [ 720/2502] eta: 1:10:11 lr: 0.000002 loss_cls: 4.0995 (3.9037) grad_norm: 2.4318 (2.4422) time: 2.3735 data: 0.0003 max mem: 8426 +[2024-12-11 03:57:22 root] (utils.py 283): INFO Epoch: [27] [ 730/2502] eta: 1:09:46 lr: 0.000002 loss_cls: 3.8557 (3.9002) grad_norm: 2.4108 (2.4422) time: 2.3147 data: 0.0002 max mem: 8426 +[2024-12-11 03:57:45 root] (utils.py 283): INFO Epoch: [27] [ 740/2502] eta: 1:09:21 lr: 0.000002 loss_cls: 3.6598 (3.8987) grad_norm: 2.4441 (2.4422) time: 2.3082 data: 0.0002 max mem: 8426 +[2024-12-11 03:58:09 root] (utils.py 283): INFO Epoch: [27] [ 750/2502] eta: 1:08:57 lr: 0.000002 loss_cls: 4.1114 (3.9012) grad_norm: 2.4713 (2.4429) time: 2.3287 data: 0.0003 max mem: 8426 +[2024-12-11 03:58:32 root] (utils.py 283): INFO Epoch: [27] [ 760/2502] eta: 1:08:33 lr: 0.000002 loss_cls: 4.1797 (3.9031) grad_norm: 2.4805 (2.4432) time: 2.3457 data: 0.0003 max mem: 8426 +[2024-12-11 03:58:56 root] (utils.py 283): INFO Epoch: [27] [ 770/2502] eta: 1:08:10 lr: 0.000002 loss_cls: 4.3000 (3.9055) grad_norm: 2.4320 (2.4435) time: 2.3709 data: 0.0003 max mem: 8426 +[2024-12-11 03:59:20 root] (utils.py 283): INFO Epoch: [27] [ 780/2502] eta: 1:07:47 lr: 0.000002 loss_cls: 4.1285 (3.9058) grad_norm: 2.3949 (2.4421) time: 2.3934 data: 0.0002 max mem: 8426 +[2024-12-11 03:59:43 root] (utils.py 283): INFO Epoch: [27] [ 790/2502] eta: 1:07:23 lr: 0.000002 loss_cls: 3.9409 (3.9076) grad_norm: 2.4170 (2.4447) time: 2.3556 data: 0.0003 max mem: 8426 +[2024-12-11 04:00:07 root] (utils.py 283): INFO Epoch: [27] [ 800/2502] eta: 1:06:59 lr: 0.000002 loss_cls: 3.9409 (3.9090) grad_norm: 2.4460 (2.4448) time: 2.3314 data: 0.0003 max mem: 8426 +[2024-12-11 04:00:30 root] (utils.py 283): INFO Epoch: [27] [ 810/2502] eta: 1:06:35 lr: 0.000002 loss_cls: 3.9742 (3.9108) grad_norm: 2.4242 (2.4444) time: 2.3383 data: 0.0002 max mem: 8426 +[2024-12-11 04:00:54 root] (utils.py 283): INFO Epoch: [27] [ 820/2502] eta: 1:06:12 lr: 0.000002 loss_cls: 4.1920 (3.9145) grad_norm: 2.4079 (2.4438) time: 2.3610 data: 0.0002 max mem: 8426 +[2024-12-11 04:01:18 root] (utils.py 283): INFO Epoch: [27] [ 830/2502] eta: 1:05:49 lr: 0.000002 loss_cls: 4.1403 (3.9153) grad_norm: 2.3800 (2.4435) time: 2.3915 data: 0.0002 max mem: 8426 +[2024-12-11 04:01:41 root] (utils.py 283): INFO Epoch: [27] [ 840/2502] eta: 1:05:24 lr: 0.000002 loss_cls: 3.8070 (3.9124) grad_norm: 2.3800 (2.4428) time: 2.3553 data: 0.0003 max mem: 8426 +[2024-12-11 04:02:05 root] (utils.py 283): INFO Epoch: [27] [ 850/2502] eta: 1:05:01 lr: 0.000002 loss_cls: 3.9382 (3.9138) grad_norm: 2.3714 (2.4431) time: 2.3551 data: 0.0002 max mem: 8426 +[2024-12-11 04:02:28 root] (utils.py 283): INFO Epoch: [27] [ 860/2502] eta: 1:04:37 lr: 0.000002 loss_cls: 4.1318 (3.9149) grad_norm: 2.4107 (2.4432) time: 2.3689 data: 0.0002 max mem: 8426 +[2024-12-11 04:02:52 root] (utils.py 283): INFO Epoch: [27] [ 870/2502] eta: 1:04:14 lr: 0.000002 loss_cls: 3.7854 (3.9118) grad_norm: 2.4107 (2.4431) time: 2.3555 data: 0.0003 max mem: 8426 +[2024-12-11 04:03:16 root] (utils.py 283): INFO Epoch: [27] [ 880/2502] eta: 1:03:51 lr: 0.000002 loss_cls: 3.9317 (3.9165) grad_norm: 2.3880 (2.4428) time: 2.3846 data: 0.0003 max mem: 8426 +[2024-12-11 04:03:40 root] (utils.py 283): INFO Epoch: [27] [ 890/2502] eta: 1:03:28 lr: 0.000002 loss_cls: 4.2951 (3.9200) grad_norm: 2.4045 (2.4428) time: 2.4030 data: 0.0002 max mem: 8426 +[2024-12-11 04:04:04 root] (utils.py 283): INFO Epoch: [27] [ 900/2502] eta: 1:03:05 lr: 0.000002 loss_cls: 4.2219 (3.9182) grad_norm: 2.4290 (2.4427) time: 2.4041 data: 0.0003 max mem: 8426 +[2024-12-11 04:04:28 root] (utils.py 283): INFO Epoch: [27] [ 910/2502] eta: 1:02:42 lr: 0.000002 loss_cls: 4.1950 (3.9208) grad_norm: 2.3825 (2.4425) time: 2.4089 data: 0.0003 max mem: 8426 +[2024-12-11 04:04:52 root] (utils.py 283): INFO Epoch: [27] [ 920/2502] eta: 1:02:18 lr: 0.000002 loss_cls: 4.2721 (3.9201) grad_norm: 2.4225 (2.4424) time: 2.3790 data: 0.0003 max mem: 8426 +[2024-12-11 04:05:15 root] (utils.py 283): INFO Epoch: [27] [ 930/2502] eta: 1:01:55 lr: 0.000002 loss_cls: 3.7270 (3.9176) grad_norm: 2.4290 (2.4423) time: 2.3511 data: 0.0003 max mem: 8426 +[2024-12-11 04:05:39 root] (utils.py 283): INFO Epoch: [27] [ 940/2502] eta: 1:01:30 lr: 0.000002 loss_cls: 3.8548 (3.9178) grad_norm: 2.3914 (2.4413) time: 2.3434 data: 0.0003 max mem: 8426 +[2024-12-11 04:06:02 root] (utils.py 283): INFO Epoch: [27] [ 950/2502] eta: 1:01:06 lr: 0.000002 loss_cls: 4.0166 (3.9138) grad_norm: 2.4053 (2.4413) time: 2.3305 data: 0.0002 max mem: 8426 +[2024-12-11 04:06:25 root] (utils.py 283): INFO Epoch: [27] [ 960/2502] eta: 1:00:41 lr: 0.000002 loss_cls: 3.6988 (3.9124) grad_norm: 2.4635 (2.4411) time: 2.3079 data: 0.0002 max mem: 8426 +[2024-12-11 04:06:48 root] (utils.py 283): INFO Epoch: [27] [ 970/2502] eta: 1:00:17 lr: 0.000002 loss_cls: 3.9532 (3.9132) grad_norm: 2.4043 (2.4412) time: 2.3066 data: 0.0003 max mem: 8426 +[2024-12-11 04:07:11 root] (utils.py 283): INFO Epoch: [27] [ 980/2502] eta: 0:59:53 lr: 0.000002 loss_cls: 4.0156 (3.9148) grad_norm: 2.4332 (2.4414) time: 2.3136 data: 0.0003 max mem: 8426 +[2024-12-11 04:07:34 root] (utils.py 283): INFO Epoch: [27] [ 990/2502] eta: 0:59:29 lr: 0.000002 loss_cls: 4.0158 (3.9161) grad_norm: 2.4546 (2.4414) time: 2.3125 data: 0.0002 max mem: 8426 +[2024-12-11 04:07:58 root] (utils.py 283): INFO Epoch: [27] [1000/2502] eta: 0:59:05 lr: 0.000002 loss_cls: 4.0158 (3.9163) grad_norm: 2.4142 (2.4414) time: 2.3529 data: 0.0002 max mem: 8426 +[2024-12-11 04:08:21 root] (utils.py 283): INFO Epoch: [27] [1010/2502] eta: 0:58:40 lr: 0.000002 loss_cls: 4.2105 (3.9198) grad_norm: 2.4164 (2.4417) time: 2.3227 data: 0.0003 max mem: 8426 +[2024-12-11 04:08:46 root] (utils.py 283): INFO Epoch: [27] [1020/2502] eta: 0:58:19 lr: 0.000002 loss_cls: 4.2362 (3.9190) grad_norm: 2.4792 (2.4419) time: 2.3918 data: 0.0002 max mem: 8426 +[2024-12-11 04:09:10 root] (utils.py 283): INFO Epoch: [27] [1030/2502] eta: 0:57:55 lr: 0.000002 loss_cls: 3.8221 (3.9166) grad_norm: 2.4291 (2.4416) time: 2.4381 data: 0.0002 max mem: 8426 +[2024-12-11 04:09:33 root] (utils.py 283): INFO Epoch: [27] [1040/2502] eta: 0:57:31 lr: 0.000002 loss_cls: 3.9938 (3.9177) grad_norm: 2.4110 (2.4414) time: 2.3486 data: 0.0003 max mem: 8426 +[2024-12-11 04:09:56 root] (utils.py 283): INFO Epoch: [27] [1050/2502] eta: 0:57:07 lr: 0.000002 loss_cls: 3.9322 (3.9157) grad_norm: 2.3790 (2.4410) time: 2.3398 data: 0.0003 max mem: 8426 +[2024-12-11 04:10:19 root] (utils.py 283): INFO Epoch: [27] [1060/2502] eta: 0:56:42 lr: 0.000002 loss_cls: 3.8290 (3.9142) grad_norm: 2.3831 (2.4407) time: 2.2903 data: 0.0003 max mem: 8426 +[2024-12-11 04:10:42 root] (utils.py 283): INFO Epoch: [27] [1070/2502] eta: 0:56:18 lr: 0.000002 loss_cls: 4.0278 (3.9161) grad_norm: 2.4155 (2.4405) time: 2.2596 data: 0.0003 max mem: 8426 +[2024-12-11 04:11:05 root] (utils.py 283): INFO Epoch: [27] [1080/2502] eta: 0:55:54 lr: 0.000002 loss_cls: 4.1611 (3.9154) grad_norm: 2.4144 (2.4407) time: 2.3338 data: 0.0003 max mem: 8426 +[2024-12-11 04:11:28 root] (utils.py 283): INFO Epoch: [27] [1090/2502] eta: 0:55:30 lr: 0.000002 loss_cls: 4.1542 (3.9164) grad_norm: 2.4619 (2.4414) time: 2.3300 data: 0.0002 max mem: 8426 +[2024-12-11 04:11:51 root] (utils.py 283): INFO Epoch: [27] [1100/2502] eta: 0:55:05 lr: 0.000002 loss_cls: 4.1144 (3.9167) grad_norm: 2.4676 (2.4410) time: 2.2906 data: 0.0002 max mem: 8426 +[2024-12-11 04:12:14 root] (utils.py 283): INFO Epoch: [27] [1110/2502] eta: 0:54:41 lr: 0.000002 loss_cls: 4.0930 (3.9172) grad_norm: 2.3756 (2.4405) time: 2.3050 data: 0.0003 max mem: 8426 +[2024-12-11 04:12:37 root] (utils.py 283): INFO Epoch: [27] [1120/2502] eta: 0:54:17 lr: 0.000002 loss_cls: 4.1044 (3.9175) grad_norm: 2.3776 (2.4402) time: 2.3094 data: 0.0003 max mem: 8426 +[2024-12-11 04:13:01 root] (utils.py 283): INFO Epoch: [27] [1130/2502] eta: 0:53:53 lr: 0.000002 loss_cls: 4.0637 (3.9148) grad_norm: 2.3960 (2.4398) time: 2.3114 data: 0.0003 max mem: 8426 +[2024-12-11 04:13:24 root] (utils.py 283): INFO Epoch: [27] [1140/2502] eta: 0:53:29 lr: 0.000002 loss_cls: 3.8247 (3.9133) grad_norm: 2.3658 (2.4392) time: 2.3246 data: 0.0003 max mem: 8426 +[2024-12-11 04:13:48 root] (utils.py 283): INFO Epoch: [27] [1150/2502] eta: 0:53:06 lr: 0.000002 loss_cls: 3.7123 (3.9110) grad_norm: 2.3340 (2.4388) time: 2.3516 data: 0.0003 max mem: 8426 +[2024-12-11 04:14:11 root] (utils.py 283): INFO Epoch: [27] [1160/2502] eta: 0:52:42 lr: 0.000002 loss_cls: 4.0416 (3.9122) grad_norm: 2.3888 (2.4390) time: 2.3684 data: 0.0003 max mem: 8426 +[2024-12-11 04:14:35 root] (utils.py 283): INFO Epoch: [27] [1170/2502] eta: 0:52:19 lr: 0.000002 loss_cls: 4.2805 (3.9160) grad_norm: 2.4258 (2.4389) time: 2.3888 data: 0.0003 max mem: 8426 +[2024-12-11 04:14:59 root] (utils.py 283): INFO Epoch: [27] [1180/2502] eta: 0:51:55 lr: 0.000002 loss_cls: 4.2742 (3.9139) grad_norm: 2.3518 (2.4381) time: 2.3662 data: 0.0002 max mem: 8426 +[2024-12-11 04:15:22 root] (utils.py 283): INFO Epoch: [27] [1190/2502] eta: 0:51:31 lr: 0.000002 loss_cls: 3.9078 (3.9142) grad_norm: 2.3332 (2.4376) time: 2.3059 data: 0.0003 max mem: 8426 +[2024-12-11 04:15:45 root] (utils.py 283): INFO Epoch: [27] [1200/2502] eta: 0:51:07 lr: 0.000002 loss_cls: 3.9078 (3.9137) grad_norm: 2.4084 (2.4374) time: 2.2942 data: 0.0003 max mem: 8426 +[2024-12-11 04:16:09 root] (utils.py 283): INFO Epoch: [27] [1210/2502] eta: 0:50:44 lr: 0.000002 loss_cls: 3.7670 (3.9111) grad_norm: 2.4505 (2.4378) time: 2.3609 data: 0.0003 max mem: 8426 +[2024-12-11 04:16:32 root] (utils.py 283): INFO Epoch: [27] [1220/2502] eta: 0:50:20 lr: 0.000002 loss_cls: 3.7902 (3.9098) grad_norm: 2.4705 (2.4378) time: 2.3489 data: 0.0003 max mem: 8426 +[2024-12-11 04:16:55 root] (utils.py 283): INFO Epoch: [27] [1230/2502] eta: 0:49:55 lr: 0.000002 loss_cls: 3.7902 (3.9081) grad_norm: 2.4926 (2.4382) time: 2.2898 data: 0.0003 max mem: 8426 +[2024-12-11 04:17:18 root] (utils.py 283): INFO Epoch: [27] [1240/2502] eta: 0:49:32 lr: 0.000002 loss_cls: 4.0231 (3.9071) grad_norm: 2.4556 (2.4381) time: 2.3129 data: 0.0003 max mem: 8426 +[2024-12-11 04:17:41 root] (utils.py 283): INFO Epoch: [27] [1250/2502] eta: 0:49:08 lr: 0.000002 loss_cls: 3.9695 (3.9070) grad_norm: 2.4494 (2.4382) time: 2.3372 data: 0.0003 max mem: 8426 +[2024-12-11 04:18:05 root] (utils.py 283): INFO Epoch: [27] [1260/2502] eta: 0:48:45 lr: 0.000002 loss_cls: 3.8836 (3.9068) grad_norm: 2.4494 (2.4385) time: 2.3625 data: 0.0003 max mem: 8426 +[2024-12-11 04:18:28 root] (utils.py 283): INFO Epoch: [27] [1270/2502] eta: 0:48:20 lr: 0.000002 loss_cls: 4.0433 (3.9076) grad_norm: 2.4681 (2.4387) time: 2.3379 data: 0.0003 max mem: 8426 +[2024-12-11 04:18:52 root] (utils.py 283): INFO Epoch: [27] [1280/2502] eta: 0:47:57 lr: 0.000002 loss_cls: 4.0433 (3.9099) grad_norm: 2.4631 (2.4390) time: 2.3284 data: 0.0003 max mem: 8426 +[2024-12-11 04:19:15 root] (utils.py 283): INFO Epoch: [27] [1290/2502] eta: 0:47:33 lr: 0.000002 loss_cls: 4.0477 (3.9091) grad_norm: 2.4352 (2.4387) time: 2.3478 data: 0.0002 max mem: 8426 +[2024-12-11 04:19:38 root] (utils.py 283): INFO Epoch: [27] [1300/2502] eta: 0:47:09 lr: 0.000002 loss_cls: 4.0477 (3.9081) grad_norm: 2.3948 (2.4391) time: 2.3340 data: 0.0002 max mem: 8426 +[2024-12-11 04:20:02 root] (utils.py 283): INFO Epoch: [27] [1310/2502] eta: 0:46:46 lr: 0.000002 loss_cls: 3.8371 (3.9057) grad_norm: 2.4317 (2.4388) time: 2.3546 data: 0.0002 max mem: 8426 +[2024-12-11 04:20:25 root] (utils.py 283): INFO Epoch: [27] [1320/2502] eta: 0:46:22 lr: 0.000002 loss_cls: 3.8049 (3.9039) grad_norm: 2.4202 (2.4389) time: 2.3502 data: 0.0003 max mem: 8426 +[2024-12-11 04:20:49 root] (utils.py 283): INFO Epoch: [27] [1330/2502] eta: 0:45:59 lr: 0.000002 loss_cls: 4.0594 (3.9044) grad_norm: 2.4666 (2.4393) time: 2.3470 data: 0.0003 max mem: 8426 +[2024-12-11 04:21:12 root] (utils.py 283): INFO Epoch: [27] [1340/2502] eta: 0:45:35 lr: 0.000002 loss_cls: 4.1374 (3.9049) grad_norm: 2.4451 (2.4392) time: 2.3564 data: 0.0003 max mem: 8426 +[2024-12-11 04:21:36 root] (utils.py 283): INFO Epoch: [27] [1350/2502] eta: 0:45:12 lr: 0.000002 loss_cls: 4.0286 (3.9039) grad_norm: 2.4311 (2.4390) time: 2.3543 data: 0.0003 max mem: 8426 +[2024-12-11 04:22:00 root] (utils.py 283): INFO Epoch: [27] [1360/2502] eta: 0:44:49 lr: 0.000002 loss_cls: 4.0553 (3.9040) grad_norm: 2.4026 (2.4391) time: 2.3738 data: 0.0003 max mem: 8426 +[2024-12-11 04:22:24 root] (utils.py 283): INFO Epoch: [27] [1370/2502] eta: 0:44:25 lr: 0.000002 loss_cls: 4.1992 (3.9060) grad_norm: 2.4074 (2.4395) time: 2.3725 data: 0.0003 max mem: 8426 +[2024-12-11 04:22:48 root] (utils.py 283): INFO Epoch: [27] [1380/2502] eta: 0:44:02 lr: 0.000002 loss_cls: 4.1193 (3.9056) grad_norm: 2.4074 (2.4393) time: 2.3852 data: 0.0003 max mem: 8426 +[2024-12-11 04:23:11 root] (utils.py 283): INFO Epoch: [27] [1390/2502] eta: 0:43:38 lr: 0.000002 loss_cls: 4.0090 (3.9061) grad_norm: 2.3864 (2.4392) time: 2.3772 data: 0.0003 max mem: 8426 +[2024-12-11 04:23:36 root] (utils.py 283): INFO Epoch: [27] [1400/2502] eta: 0:43:16 lr: 0.000002 loss_cls: 4.0131 (3.9057) grad_norm: 2.4233 (2.4396) time: 2.4209 data: 0.0003 max mem: 8426 +[2024-12-11 04:23:59 root] (utils.py 283): INFO Epoch: [27] [1410/2502] eta: 0:42:52 lr: 0.000002 loss_cls: 3.9429 (3.9067) grad_norm: 2.4084 (2.4394) time: 2.3842 data: 0.0003 max mem: 8426 +[2024-12-11 04:24:22 root] (utils.py 283): INFO Epoch: [27] [1420/2502] eta: 0:42:28 lr: 0.000002 loss_cls: 3.9429 (3.9070) grad_norm: 2.3286 (2.4387) time: 2.3171 data: 0.0003 max mem: 8426 +[2024-12-11 04:24:46 root] (utils.py 283): INFO Epoch: [27] [1430/2502] eta: 0:42:05 lr: 0.000002 loss_cls: 4.0224 (3.9070) grad_norm: 2.3643 (2.4390) time: 2.3597 data: 0.0003 max mem: 8426 +[2024-12-11 04:25:09 root] (utils.py 283): INFO Epoch: [27] [1440/2502] eta: 0:41:41 lr: 0.000002 loss_cls: 4.0013 (3.9074) grad_norm: 2.4416 (2.4389) time: 2.3546 data: 0.0003 max mem: 8426 +[2024-12-11 04:25:33 root] (utils.py 283): INFO Epoch: [27] [1450/2502] eta: 0:41:18 lr: 0.000002 loss_cls: 3.8591 (3.9073) grad_norm: 2.4181 (2.4388) time: 2.3714 data: 0.0003 max mem: 8426 +[2024-12-11 04:25:57 root] (utils.py 283): INFO Epoch: [27] [1460/2502] eta: 0:40:54 lr: 0.000002 loss_cls: 4.0303 (3.9075) grad_norm: 2.4689 (2.4392) time: 2.3895 data: 0.0002 max mem: 8426 +[2024-12-11 04:26:22 root] (utils.py 283): INFO Epoch: [27] [1470/2502] eta: 0:40:31 lr: 0.000002 loss_cls: 4.0926 (3.9074) grad_norm: 2.5216 (2.4397) time: 2.4094 data: 0.0002 max mem: 8426 +[2024-12-11 04:26:45 root] (utils.py 283): INFO Epoch: [27] [1480/2502] eta: 0:40:08 lr: 0.000002 loss_cls: 3.7516 (3.9065) grad_norm: 2.4462 (2.4398) time: 2.3830 data: 0.0003 max mem: 8426 +[2024-12-11 04:27:09 root] (utils.py 283): INFO Epoch: [27] [1490/2502] eta: 0:39:44 lr: 0.000002 loss_cls: 3.7004 (3.9036) grad_norm: 2.4311 (2.4399) time: 2.3477 data: 0.0003 max mem: 8426 +[2024-12-11 04:27:33 root] (utils.py 283): INFO Epoch: [27] [1500/2502] eta: 0:39:21 lr: 0.000002 loss_cls: 4.2107 (3.9064) grad_norm: 2.4451 (2.4400) time: 2.3858 data: 0.0003 max mem: 8426 +[2024-12-11 04:27:57 root] (utils.py 283): INFO Epoch: [27] [1510/2502] eta: 0:38:58 lr: 0.000002 loss_cls: 4.3282 (3.9084) grad_norm: 2.4451 (2.4400) time: 2.4114 data: 0.0003 max mem: 8426 +[2024-12-11 04:28:20 root] (utils.py 283): INFO Epoch: [27] [1520/2502] eta: 0:38:34 lr: 0.000002 loss_cls: 3.9424 (3.9064) grad_norm: 2.4335 (2.4403) time: 2.3707 data: 0.0003 max mem: 8426 +[2024-12-11 04:28:43 root] (utils.py 283): INFO Epoch: [27] [1530/2502] eta: 0:38:10 lr: 0.000002 loss_cls: 3.5969 (3.9041) grad_norm: 2.4483 (2.4402) time: 2.3176 data: 0.0003 max mem: 8426 +[2024-12-11 04:29:07 root] (utils.py 283): INFO Epoch: [27] [1540/2502] eta: 0:37:47 lr: 0.000002 loss_cls: 3.6096 (3.9027) grad_norm: 2.4483 (2.4404) time: 2.3362 data: 0.0003 max mem: 8426 +[2024-12-11 04:29:30 root] (utils.py 283): INFO Epoch: [27] [1550/2502] eta: 0:37:23 lr: 0.000002 loss_cls: 3.6479 (3.9014) grad_norm: 2.4454 (2.4404) time: 2.3571 data: 0.0003 max mem: 8426 +[2024-12-11 04:29:54 root] (utils.py 283): INFO Epoch: [27] [1560/2502] eta: 0:36:59 lr: 0.000002 loss_cls: 3.9673 (3.9015) grad_norm: 2.3960 (2.4404) time: 2.3480 data: 0.0003 max mem: 8426 +[2024-12-11 04:30:17 root] (utils.py 283): INFO Epoch: [27] [1570/2502] eta: 0:36:35 lr: 0.000002 loss_cls: 3.6177 (3.8997) grad_norm: 2.4371 (2.4406) time: 2.3259 data: 0.0002 max mem: 8426 +[2024-12-11 04:30:40 root] (utils.py 283): INFO Epoch: [27] [1580/2502] eta: 0:36:12 lr: 0.000002 loss_cls: 3.8523 (3.9002) grad_norm: 2.4662 (2.4411) time: 2.3328 data: 0.0003 max mem: 8426 +[2024-12-11 04:31:03 root] (utils.py 283): INFO Epoch: [27] [1590/2502] eta: 0:35:48 lr: 0.000002 loss_cls: 3.9031 (3.8990) grad_norm: 2.4642 (2.4410) time: 2.3283 data: 0.0003 max mem: 8426 +[2024-12-11 04:31:26 root] (utils.py 283): INFO Epoch: [27] [1600/2502] eta: 0:35:24 lr: 0.000002 loss_cls: 3.6270 (3.8992) grad_norm: 2.4065 (2.4413) time: 2.2820 data: 0.0003 max mem: 8426 +[2024-12-11 04:31:50 root] (utils.py 283): INFO Epoch: [27] [1610/2502] eta: 0:35:01 lr: 0.000002 loss_cls: 3.9877 (3.8989) grad_norm: 2.4567 (2.4413) time: 2.3250 data: 0.0003 max mem: 8426 +[2024-12-11 04:32:14 root] (utils.py 283): INFO Epoch: [27] [1620/2502] eta: 0:34:37 lr: 0.000002 loss_cls: 4.0815 (3.8999) grad_norm: 2.3978 (2.4411) time: 2.3824 data: 0.0002 max mem: 8426 +[2024-12-11 04:32:38 root] (utils.py 283): INFO Epoch: [27] [1630/2502] eta: 0:34:14 lr: 0.000002 loss_cls: 4.0815 (3.9008) grad_norm: 2.4172 (2.4412) time: 2.4011 data: 0.0003 max mem: 8426 +[2024-12-11 04:33:01 root] (utils.py 283): INFO Epoch: [27] [1640/2502] eta: 0:33:50 lr: 0.000002 loss_cls: 3.9187 (3.8996) grad_norm: 2.4743 (2.4413) time: 2.3821 data: 0.0003 max mem: 8426 +[2024-12-11 04:33:25 root] (utils.py 283): INFO Epoch: [27] [1650/2502] eta: 0:33:27 lr: 0.000002 loss_cls: 3.9339 (3.8994) grad_norm: 2.4865 (2.4417) time: 2.3527 data: 0.0002 max mem: 8426 +[2024-12-11 04:33:48 root] (utils.py 283): INFO Epoch: [27] [1660/2502] eta: 0:33:03 lr: 0.000002 loss_cls: 3.8883 (3.8997) grad_norm: 2.5039 (2.4416) time: 2.3398 data: 0.0003 max mem: 8426 +[2024-12-11 04:34:11 root] (utils.py 283): INFO Epoch: [27] [1670/2502] eta: 0:32:39 lr: 0.000002 loss_cls: 4.1918 (3.9009) grad_norm: 2.4756 (2.4421) time: 2.3145 data: 0.0003 max mem: 8426 +[2024-12-11 04:34:34 root] (utils.py 283): INFO Epoch: [27] [1680/2502] eta: 0:32:16 lr: 0.000002 loss_cls: 4.2137 (3.9012) grad_norm: 2.4738 (2.4421) time: 2.3160 data: 0.0003 max mem: 8426 +[2024-12-11 04:34:57 root] (utils.py 283): INFO Epoch: [27] [1690/2502] eta: 0:31:52 lr: 0.000002 loss_cls: 3.9416 (3.8997) grad_norm: 2.4669 (2.4425) time: 2.3084 data: 0.0003 max mem: 8426 +[2024-12-11 04:35:21 root] (utils.py 283): INFO Epoch: [27] [1700/2502] eta: 0:31:28 lr: 0.000002 loss_cls: 3.6433 (3.8980) grad_norm: 2.4669 (2.4429) time: 2.3433 data: 0.0003 max mem: 8426 +[2024-12-11 04:35:45 root] (utils.py 283): INFO Epoch: [27] [1710/2502] eta: 0:31:05 lr: 0.000002 loss_cls: 3.8187 (3.8983) grad_norm: 2.4873 (2.4434) time: 2.3533 data: 0.0003 max mem: 8426 +[2024-12-11 04:36:08 root] (utils.py 283): INFO Epoch: [27] [1720/2502] eta: 0:30:41 lr: 0.000002 loss_cls: 3.9107 (3.8981) grad_norm: 2.4803 (2.4432) time: 2.3500 data: 0.0003 max mem: 8426 +[2024-12-11 04:36:32 root] (utils.py 283): INFO Epoch: [27] [1730/2502] eta: 0:30:18 lr: 0.000002 loss_cls: 3.8825 (3.8973) grad_norm: 2.4116 (2.4433) time: 2.3673 data: 0.0002 max mem: 8426 +[2024-12-11 04:36:55 root] (utils.py 283): INFO Epoch: [27] [1740/2502] eta: 0:29:54 lr: 0.000002 loss_cls: 4.0140 (3.8987) grad_norm: 2.4323 (2.4434) time: 2.3335 data: 0.0002 max mem: 8426 +[2024-12-11 04:37:19 root] (utils.py 283): INFO Epoch: [27] [1750/2502] eta: 0:29:30 lr: 0.000002 loss_cls: 4.2165 (3.8987) grad_norm: 2.4788 (2.4437) time: 2.3334 data: 0.0002 max mem: 8426 +[2024-12-11 04:37:42 root] (utils.py 283): INFO Epoch: [27] [1760/2502] eta: 0:29:07 lr: 0.000002 loss_cls: 3.7730 (3.8977) grad_norm: 2.4675 (2.4438) time: 2.3556 data: 0.0003 max mem: 8426 +[2024-12-11 04:38:06 root] (utils.py 283): INFO Epoch: [27] [1770/2502] eta: 0:28:43 lr: 0.000002 loss_cls: 3.8247 (3.8983) grad_norm: 2.4719 (2.4438) time: 2.3651 data: 0.0003 max mem: 8426 +[2024-12-11 04:38:29 root] (utils.py 283): INFO Epoch: [27] [1780/2502] eta: 0:28:20 lr: 0.000002 loss_cls: 3.9041 (3.8981) grad_norm: 2.4419 (2.4434) time: 2.3458 data: 0.0003 max mem: 8426 +[2024-12-11 04:38:53 root] (utils.py 283): INFO Epoch: [27] [1790/2502] eta: 0:27:56 lr: 0.000002 loss_cls: 3.9149 (3.8986) grad_norm: 2.3737 (2.4434) time: 2.3526 data: 0.0003 max mem: 8426 +[2024-12-11 04:39:17 root] (utils.py 283): INFO Epoch: [27] [1800/2502] eta: 0:27:33 lr: 0.000002 loss_cls: 3.9702 (3.8991) grad_norm: 2.3751 (2.4432) time: 2.4216 data: 0.0003 max mem: 8426 +[2024-12-11 04:39:42 root] (utils.py 283): INFO Epoch: [27] [1810/2502] eta: 0:27:10 lr: 0.000002 loss_cls: 4.1161 (3.9000) grad_norm: 2.3691 (2.4429) time: 2.4382 data: 0.0003 max mem: 8426 +[2024-12-11 04:40:06 root] (utils.py 283): INFO Epoch: [27] [1820/2502] eta: 0:26:46 lr: 0.000002 loss_cls: 4.1360 (3.8997) grad_norm: 2.3728 (2.4430) time: 2.4065 data: 0.0003 max mem: 8426 +[2024-12-11 04:40:29 root] (utils.py 283): INFO Epoch: [27] [1830/2502] eta: 0:26:23 lr: 0.000002 loss_cls: 3.9444 (3.9000) grad_norm: 2.4497 (2.4430) time: 2.3844 data: 0.0003 max mem: 8426 +[2024-12-11 04:40:53 root] (utils.py 283): INFO Epoch: [27] [1840/2502] eta: 0:25:59 lr: 0.000002 loss_cls: 3.9444 (3.8998) grad_norm: 2.4070 (2.4429) time: 2.3548 data: 0.0003 max mem: 8426 +[2024-12-11 04:41:16 root] (utils.py 283): INFO Epoch: [27] [1850/2502] eta: 0:25:35 lr: 0.000002 loss_cls: 3.7414 (3.8983) grad_norm: 2.3989 (2.4428) time: 2.3102 data: 0.0003 max mem: 8426 +[2024-12-11 04:41:39 root] (utils.py 283): INFO Epoch: [27] [1860/2502] eta: 0:25:12 lr: 0.000002 loss_cls: 3.7414 (3.8976) grad_norm: 2.3910 (2.4427) time: 2.3186 data: 0.0003 max mem: 8426 +[2024-12-11 04:42:03 root] (utils.py 283): INFO Epoch: [27] [1870/2502] eta: 0:24:48 lr: 0.000002 loss_cls: 4.1213 (3.8994) grad_norm: 2.3910 (2.4426) time: 2.3504 data: 0.0003 max mem: 8426 +[2024-12-11 04:42:27 root] (utils.py 283): INFO Epoch: [27] [1880/2502] eta: 0:24:25 lr: 0.000002 loss_cls: 4.1838 (3.9008) grad_norm: 2.4067 (2.4427) time: 2.3846 data: 0.0003 max mem: 8426 +[2024-12-11 04:42:50 root] (utils.py 283): INFO Epoch: [27] [1890/2502] eta: 0:24:01 lr: 0.000002 loss_cls: 4.1778 (3.9015) grad_norm: 2.4364 (2.4432) time: 2.3678 data: 0.0003 max mem: 8426 +[2024-12-11 04:43:13 root] (utils.py 283): INFO Epoch: [27] [1900/2502] eta: 0:23:37 lr: 0.000002 loss_cls: 4.1619 (3.9015) grad_norm: 2.4780 (2.4433) time: 2.3052 data: 0.0003 max mem: 8426 +[2024-12-11 04:43:35 root] (utils.py 283): INFO Epoch: [27] [1910/2502] eta: 0:23:13 lr: 0.000002 loss_cls: 4.0615 (3.9016) grad_norm: 2.4438 (2.4434) time: 2.2599 data: 0.0003 max mem: 8426 +[2024-12-11 04:43:59 root] (utils.py 283): INFO Epoch: [27] [1920/2502] eta: 0:22:50 lr: 0.000002 loss_cls: 3.9125 (3.9002) grad_norm: 2.3993 (2.4433) time: 2.2945 data: 0.0003 max mem: 8426 +[2024-12-11 04:44:34 root] (utils.py 283): INFO Epoch: [27] [1930/2502] eta: 0:22:30 lr: 0.000002 loss_cls: 3.7110 (3.9001) grad_norm: 2.4399 (2.4437) time: 2.9642 data: 0.0003 max mem: 8426 +[2024-12-11 04:45:04 root] (utils.py 283): INFO Epoch: [27] [1940/2502] eta: 0:22:08 lr: 0.000002 loss_cls: 3.7599 (3.8997) grad_norm: 2.4600 (2.4441) time: 3.2568 data: 0.0003 max mem: 8426 +[2024-12-11 04:45:26 root] (utils.py 283): INFO Epoch: [27] [1950/2502] eta: 0:21:44 lr: 0.000002 loss_cls: 3.9759 (3.8987) grad_norm: 2.4551 (2.4442) time: 2.5852 data: 0.0003 max mem: 8426 +[2024-12-11 04:45:49 root] (utils.py 283): INFO Epoch: [27] [1960/2502] eta: 0:21:20 lr: 0.000002 loss_cls: 3.9084 (3.8984) grad_norm: 2.4482 (2.4443) time: 2.2544 data: 0.0003 max mem: 8426 +[2024-12-11 04:46:12 root] (utils.py 283): INFO Epoch: [27] [1970/2502] eta: 0:20:57 lr: 0.000002 loss_cls: 4.0379 (3.8983) grad_norm: 2.4148 (2.4440) time: 2.3165 data: 0.0003 max mem: 8426 +[2024-12-11 04:46:36 root] (utils.py 283): INFO Epoch: [27] [1980/2502] eta: 0:20:33 lr: 0.000002 loss_cls: 3.8472 (3.8980) grad_norm: 2.3882 (2.4438) time: 2.3349 data: 0.0003 max mem: 8426 +[2024-12-11 04:46:59 root] (utils.py 283): INFO Epoch: [27] [1990/2502] eta: 0:20:09 lr: 0.000002 loss_cls: 3.8554 (3.8980) grad_norm: 2.4114 (2.4436) time: 2.3393 data: 0.0003 max mem: 8426 +[2024-12-11 04:47:23 root] (utils.py 283): INFO Epoch: [27] [2000/2502] eta: 0:19:45 lr: 0.000002 loss_cls: 4.0914 (3.8981) grad_norm: 2.4868 (2.4441) time: 2.3453 data: 0.0003 max mem: 8426 +[2024-12-11 04:47:46 root] (utils.py 283): INFO Epoch: [27] [2010/2502] eta: 0:19:22 lr: 0.000002 loss_cls: 3.7112 (3.8970) grad_norm: 2.4875 (2.4442) time: 2.3308 data: 0.0003 max mem: 8426 +[2024-12-11 04:48:09 root] (utils.py 283): INFO Epoch: [27] [2020/2502] eta: 0:18:58 lr: 0.000002 loss_cls: 3.5618 (3.8952) grad_norm: 2.4451 (2.4442) time: 2.3236 data: 0.0003 max mem: 8426 +[2024-12-11 04:48:33 root] (utils.py 283): INFO Epoch: [27] [2030/2502] eta: 0:18:34 lr: 0.000002 loss_cls: 3.6028 (3.8943) grad_norm: 2.4396 (2.4439) time: 2.3402 data: 0.0003 max mem: 8426 +[2024-12-11 04:48:56 root] (utils.py 283): INFO Epoch: [27] [2040/2502] eta: 0:18:11 lr: 0.000002 loss_cls: 3.6841 (3.8940) grad_norm: 2.4224 (2.4437) time: 2.3606 data: 0.0003 max mem: 8426 +[2024-12-11 04:49:19 root] (utils.py 283): INFO Epoch: [27] [2050/2502] eta: 0:17:47 lr: 0.000002 loss_cls: 3.6930 (3.8934) grad_norm: 2.3754 (2.4434) time: 2.3174 data: 0.0003 max mem: 8426 +[2024-12-11 04:49:42 root] (utils.py 283): INFO Epoch: [27] [2060/2502] eta: 0:17:23 lr: 0.000002 loss_cls: 3.9498 (3.8934) grad_norm: 2.3997 (2.4433) time: 2.2842 data: 0.0003 max mem: 8426 +[2024-12-11 04:50:05 root] (utils.py 283): INFO Epoch: [27] [2070/2502] eta: 0:17:00 lr: 0.000002 loss_cls: 3.8577 (3.8917) grad_norm: 2.4302 (2.4431) time: 2.3096 data: 0.0003 max mem: 8426 +[2024-12-11 04:50:28 root] (utils.py 283): INFO Epoch: [27] [2080/2502] eta: 0:16:36 lr: 0.000002 loss_cls: 3.6936 (3.8912) grad_norm: 2.3797 (2.4428) time: 2.3087 data: 0.0003 max mem: 8426 +[2024-12-11 04:50:53 root] (utils.py 283): INFO Epoch: [27] [2090/2502] eta: 0:16:12 lr: 0.000002 loss_cls: 3.8662 (3.8906) grad_norm: 2.4005 (2.4429) time: 2.3801 data: 0.0003 max mem: 8426 +[2024-12-11 04:51:17 root] (utils.py 283): INFO Epoch: [27] [2100/2502] eta: 0:15:49 lr: 0.000002 loss_cls: 3.7667 (3.8900) grad_norm: 2.4005 (2.4428) time: 2.4182 data: 0.0003 max mem: 8426 +[2024-12-11 04:51:40 root] (utils.py 283): INFO Epoch: [27] [2110/2502] eta: 0:15:25 lr: 0.000002 loss_cls: 3.6817 (3.8898) grad_norm: 2.3862 (2.4428) time: 2.3763 data: 0.0003 max mem: 8426 +[2024-12-11 04:52:04 root] (utils.py 283): INFO Epoch: [27] [2120/2502] eta: 0:15:02 lr: 0.000002 loss_cls: 3.7993 (3.8892) grad_norm: 2.4193 (2.4429) time: 2.3905 data: 0.0002 max mem: 8426 +[2024-12-11 04:52:28 root] (utils.py 283): INFO Epoch: [27] [2130/2502] eta: 0:14:38 lr: 0.000002 loss_cls: 3.9551 (3.8896) grad_norm: 2.4079 (2.4426) time: 2.3908 data: 0.0002 max mem: 8426 +[2024-12-11 04:52:52 root] (utils.py 283): INFO Epoch: [27] [2140/2502] eta: 0:14:15 lr: 0.000002 loss_cls: 4.0711 (3.8899) grad_norm: 2.3371 (2.4425) time: 2.3902 data: 0.0003 max mem: 8426 +[2024-12-11 04:53:16 root] (utils.py 283): INFO Epoch: [27] [2150/2502] eta: 0:13:51 lr: 0.000002 loss_cls: 3.9870 (3.8901) grad_norm: 2.4581 (2.4427) time: 2.3906 data: 0.0003 max mem: 8426 +[2024-12-11 04:53:39 root] (utils.py 283): INFO Epoch: [27] [2160/2502] eta: 0:13:27 lr: 0.000002 loss_cls: 3.9870 (3.8912) grad_norm: 2.5037 (2.4430) time: 2.3626 data: 0.0003 max mem: 8426 +[2024-12-11 04:54:04 root] (utils.py 283): INFO Epoch: [27] [2170/2502] eta: 0:13:04 lr: 0.000002 loss_cls: 3.8639 (3.8901) grad_norm: 2.4881 (2.4432) time: 2.4185 data: 0.0003 max mem: 8426 +[2024-12-11 04:54:29 root] (utils.py 283): INFO Epoch: [27] [2180/2502] eta: 0:12:40 lr: 0.000002 loss_cls: 3.8639 (3.8906) grad_norm: 2.4763 (2.4437) time: 2.4700 data: 0.0002 max mem: 8426 +[2024-12-11 04:54:53 root] (utils.py 283): INFO Epoch: [27] [2190/2502] eta: 0:12:17 lr: 0.000002 loss_cls: 4.2568 (3.8923) grad_norm: 2.4522 (2.4438) time: 2.4124 data: 0.0002 max mem: 8426 +[2024-12-11 04:55:17 root] (utils.py 283): INFO Epoch: [27] [2200/2502] eta: 0:11:53 lr: 0.000002 loss_cls: 4.1016 (3.8925) grad_norm: 2.4478 (2.4440) time: 2.3845 data: 0.0002 max mem: 8426 +[2024-12-11 04:55:40 root] (utils.py 283): INFO Epoch: [27] [2210/2502] eta: 0:11:30 lr: 0.000002 loss_cls: 4.0920 (3.8940) grad_norm: 2.4154 (2.4439) time: 2.3798 data: 0.0002 max mem: 8426 +[2024-12-11 04:56:04 root] (utils.py 283): INFO Epoch: [27] [2220/2502] eta: 0:11:06 lr: 0.000002 loss_cls: 4.2542 (3.8937) grad_norm: 2.4048 (2.4442) time: 2.3865 data: 0.0003 max mem: 8426 +[2024-12-11 04:56:27 root] (utils.py 283): INFO Epoch: [27] [2230/2502] eta: 0:10:42 lr: 0.000002 loss_cls: 3.8128 (3.8922) grad_norm: 2.4506 (2.4443) time: 2.3550 data: 0.0003 max mem: 8426 +[2024-12-11 04:56:52 root] (utils.py 283): INFO Epoch: [27] [2240/2502] eta: 0:10:19 lr: 0.000002 loss_cls: 3.8614 (3.8921) grad_norm: 2.4839 (2.4444) time: 2.3712 data: 0.0003 max mem: 8426 +[2024-12-11 04:57:16 root] (utils.py 283): INFO Epoch: [27] [2250/2502] eta: 0:09:55 lr: 0.000002 loss_cls: 4.0066 (3.8925) grad_norm: 2.5015 (2.4446) time: 2.4278 data: 0.0002 max mem: 8426 +[2024-12-11 04:57:39 root] (utils.py 283): INFO Epoch: [27] [2260/2502] eta: 0:09:31 lr: 0.000002 loss_cls: 3.9113 (3.8920) grad_norm: 2.5015 (2.4445) time: 2.3818 data: 0.0002 max mem: 8426 +[2024-12-11 04:58:03 root] (utils.py 283): INFO Epoch: [27] [2270/2502] eta: 0:09:08 lr: 0.000002 loss_cls: 3.8425 (3.8918) grad_norm: 2.4353 (2.4446) time: 2.3514 data: 0.0003 max mem: 8426 +[2024-12-11 04:58:26 root] (utils.py 283): INFO Epoch: [27] [2280/2502] eta: 0:08:44 lr: 0.000002 loss_cls: 4.0210 (3.8915) grad_norm: 2.4314 (2.4446) time: 2.3590 data: 0.0003 max mem: 8426 +[2024-12-11 04:58:50 root] (utils.py 283): INFO Epoch: [27] [2290/2502] eta: 0:08:21 lr: 0.000002 loss_cls: 3.8821 (3.8918) grad_norm: 2.4085 (2.4443) time: 2.3388 data: 0.0003 max mem: 8426 +[2024-12-11 04:59:14 root] (utils.py 283): INFO Epoch: [27] [2300/2502] eta: 0:07:57 lr: 0.000002 loss_cls: 3.8821 (3.8915) grad_norm: 2.4085 (2.4444) time: 2.3563 data: 0.0003 max mem: 8426 +[2024-12-11 04:59:37 root] (utils.py 283): INFO Epoch: [27] [2310/2502] eta: 0:07:33 lr: 0.000002 loss_cls: 3.8842 (3.8913) grad_norm: 2.4374 (2.4447) time: 2.3644 data: 0.0003 max mem: 8426 +[2024-12-11 05:00:01 root] (utils.py 283): INFO Epoch: [27] [2320/2502] eta: 0:07:10 lr: 0.000002 loss_cls: 3.7877 (3.8891) grad_norm: 2.4374 (2.4449) time: 2.3491 data: 0.0003 max mem: 8426 +[2024-12-11 05:00:24 root] (utils.py 283): INFO Epoch: [27] [2330/2502] eta: 0:06:46 lr: 0.000002 loss_cls: 3.4952 (3.8887) grad_norm: 2.4121 (2.4449) time: 2.3626 data: 0.0002 max mem: 8426 +[2024-12-11 05:00:48 root] (utils.py 283): INFO Epoch: [27] [2340/2502] eta: 0:06:22 lr: 0.000002 loss_cls: 4.0790 (3.8885) grad_norm: 2.3820 (2.4447) time: 2.3656 data: 0.0002 max mem: 8426 +[2024-12-11 05:01:12 root] (utils.py 283): INFO Epoch: [27] [2350/2502] eta: 0:05:59 lr: 0.000002 loss_cls: 3.8094 (3.8884) grad_norm: 2.4100 (2.4445) time: 2.3871 data: 0.0002 max mem: 8426 +[2024-12-11 05:01:36 root] (utils.py 283): INFO Epoch: [27] [2360/2502] eta: 0:05:35 lr: 0.000002 loss_cls: 4.1249 (3.8892) grad_norm: 2.3848 (2.4443) time: 2.3814 data: 0.0002 max mem: 8426 +[2024-12-11 05:01:59 root] (utils.py 283): INFO Epoch: [27] [2370/2502] eta: 0:05:11 lr: 0.000002 loss_cls: 4.1133 (3.8889) grad_norm: 2.3891 (2.4447) time: 2.3526 data: 0.0002 max mem: 8426 +[2024-12-11 05:02:24 root] (utils.py 283): INFO Epoch: [27] [2380/2502] eta: 0:04:48 lr: 0.000002 loss_cls: 4.0561 (3.8899) grad_norm: 2.3891 (2.4444) time: 2.4239 data: 0.0003 max mem: 8426 +[2024-12-11 05:02:47 root] (utils.py 283): INFO Epoch: [27] [2390/2502] eta: 0:04:24 lr: 0.000002 loss_cls: 4.2025 (3.8899) grad_norm: 2.3835 (2.4445) time: 2.4141 data: 0.0003 max mem: 8426 +[2024-12-11 05:03:10 root] (utils.py 283): INFO Epoch: [27] [2400/2502] eta: 0:04:01 lr: 0.000002 loss_cls: 3.9924 (3.8899) grad_norm: 2.4676 (2.4448) time: 2.3015 data: 0.0002 max mem: 8426 +[2024-12-11 05:03:33 root] (utils.py 283): INFO Epoch: [27] [2410/2502] eta: 0:03:37 lr: 0.000002 loss_cls: 3.9831 (3.8897) grad_norm: 2.4269 (2.4448) time: 2.2878 data: 0.0003 max mem: 8426 +[2024-12-11 05:03:56 root] (utils.py 283): INFO Epoch: [27] [2420/2502] eta: 0:03:13 lr: 0.000002 loss_cls: 3.5372 (3.8882) grad_norm: 2.3852 (2.4446) time: 2.2894 data: 0.0003 max mem: 8426 +[2024-12-11 05:04:19 root] (utils.py 283): INFO Epoch: [27] [2430/2502] eta: 0:02:50 lr: 0.000002 loss_cls: 3.4493 (3.8876) grad_norm: 2.3914 (2.4443) time: 2.2856 data: 0.0003 max mem: 8426 +[2024-12-11 05:04:41 root] (utils.py 283): INFO Epoch: [27] [2440/2502] eta: 0:02:26 lr: 0.000002 loss_cls: 3.5952 (3.8865) grad_norm: 2.3876 (2.4446) time: 2.2562 data: 0.0003 max mem: 8426 +[2024-12-11 05:05:04 root] (utils.py 283): INFO Epoch: [27] [2450/2502] eta: 0:02:02 lr: 0.000002 loss_cls: 3.5575 (3.8850) grad_norm: 2.3835 (2.4445) time: 2.2478 data: 0.0003 max mem: 8426 +[2024-12-11 05:05:27 root] (utils.py 283): INFO Epoch: [27] [2460/2502] eta: 0:01:39 lr: 0.000002 loss_cls: 3.9107 (3.8853) grad_norm: 2.3667 (2.4444) time: 2.2883 data: 0.0003 max mem: 8426 +[2024-12-11 05:05:51 root] (utils.py 283): INFO Epoch: [27] [2470/2502] eta: 0:01:15 lr: 0.000002 loss_cls: 3.9069 (3.8842) grad_norm: 2.3861 (2.4445) time: 2.3736 data: 0.0003 max mem: 8426 +[2024-12-11 05:06:15 root] (utils.py 283): INFO Epoch: [27] [2480/2502] eta: 0:00:51 lr: 0.000002 loss_cls: 3.8140 (3.8836) grad_norm: 2.4548 (2.4446) time: 2.3930 data: 0.0003 max mem: 8426 +[2024-12-11 05:06:39 root] (utils.py 283): INFO Epoch: [27] [2490/2502] eta: 0:00:28 lr: 0.000002 loss_cls: 3.9387 (3.8835) grad_norm: 2.4466 (2.4448) time: 2.3648 data: 0.0257 max mem: 8426 +[2024-12-11 05:07:02 root] (utils.py 283): INFO Epoch: [27] [2500/2502] eta: 0:00:04 lr: 0.000002 loss_cls: 3.9935 (3.8842) grad_norm: 2.4199 (2.4447) time: 2.3471 data: 0.0257 max mem: 8426 +[2024-12-11 05:07:04 root] (utils.py 283): INFO Epoch: [27] [2501/2502] eta: 0:00:02 lr: 0.000002 loss_cls: 3.9935 (3.8843) grad_norm: 2.4298 (2.4447) time: 2.3232 data: 0.0257 max mem: 8426 +[2024-12-11 05:07:04 root] (utils.py 297): INFO Epoch: [27] Total time: 1:38:28 (2.3615 s / it) +[2024-12-11 05:07:04 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 3.9935 (3.8926) grad_norm: 2.4298 (2.4447) +[2024-12-11 05:07:04 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:29 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6212 (0.6212) acc1: 85.9375 (85.9375) acc3: 96.8750 (96.8750) acc5: 98.4375 (98.4375) time: 0.2972 data: 0.0003 max mem: 8426 +[2024-12-11 05:07:06 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:17 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7085 (0.7973) acc1: 85.9375 (82.7415) acc3: 95.3125 (93.6080) acc5: 97.6562 (96.8040) time: 0.1997 data: 0.0003 max mem: 8426 +[2024-12-11 05:07:08 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:15 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8437 (0.8534) acc1: 79.6875 (81.7336) acc3: 92.1875 (92.8571) acc5: 95.3125 (95.7589) time: 0.1930 data: 0.0004 max mem: 8426 +[2024-12-11 05:07:10 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9367 (0.8668) acc1: 78.9062 (80.7208) acc3: 92.9688 (93.0444) acc5: 95.3125 (95.8165) time: 0.2119 data: 0.0004 max mem: 8426 +[2024-12-11 05:07:12 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8187 (0.8568) acc1: 79.6875 (81.0976) acc3: 94.5312 (93.2165) acc5: 96.0938 (95.9032) time: 0.2044 data: 0.0004 max mem: 8426 +[2024-12-11 05:07:14 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0357 (0.9414) acc1: 75.7812 (79.0135) acc3: 88.2812 (91.8352) acc5: 92.1875 (94.8376) time: 0.1828 data: 0.0004 max mem: 8426 +[2024-12-11 05:07:16 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2270 (0.9871) acc1: 71.8750 (78.1634) acc3: 85.9375 (90.9580) acc5: 89.8438 (94.0190) time: 0.2030 data: 0.0004 max mem: 8426 +[2024-12-11 05:07:18 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1646 (1.0266) acc1: 75.0000 (77.2447) acc3: 86.7188 (90.4710) acc5: 89.8438 (93.5739) time: 0.2016 data: 0.0004 max mem: 8426 +[2024-12-11 05:07:20 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1974 (1.0605) acc1: 72.6562 (76.4660) acc3: 86.7188 (89.8727) acc5: 89.8438 (93.0363) time: 0.2048 data: 0.0006 max mem: 8426 +[2024-12-11 05:07:22 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2318 (1.0877) acc1: 68.7500 (75.7727) acc3: 86.7188 (89.5690) acc5: 89.8438 (92.7713) time: 0.2018 data: 0.0006 max mem: 8426 +[2024-12-11 05:07:23 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1299 (1.0759) acc1: 75.0000 (75.9920) acc3: 89.0625 (89.7440) acc5: 91.4062 (92.9360) time: 0.1889 data: 0.0006 max mem: 8426 +[2024-12-11 05:07:23 root] (utils.py 297): INFO Test: Total time: 0:00:19 (0.1977 s / it) +[2024-12-11 05:07:24 root] (engine.py 264): INFO * Acc@1 75.856 Acc@3 89.702 Acc@5 92.970 loss 1.078 flops 1.285 layer_flops 1.251 +[2024-12-11 05:07:24 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.9% +[2024-12-11 05:07:24 root] (main.py 576): INFO Max accuracy: 75.89% +[2024-12-11 05:07:26 root] (utils.py 283): INFO Epoch: [28] [ 0/2502] eta: 1:30:46 lr: 0.000001 loss_cls: 3.5021 (3.5021) grad_norm: 2.4961 (2.4961) time: 2.1770 data: 0.0003 max mem: 8426 +[2024-12-11 05:07:50 root] (utils.py 283): INFO Epoch: [28] [ 10/2502] eta: 1:38:21 lr: 0.000001 loss_cls: 3.5021 (3.5849) grad_norm: 2.3667 (2.3842) time: 2.3681 data: 0.0003 max mem: 8426 +[2024-12-11 05:08:14 root] (utils.py 283): INFO Epoch: [28] [ 20/2502] eta: 1:38:41 lr: 0.000001 loss_cls: 4.1239 (3.9137) grad_norm: 2.4390 (2.4157) time: 2.3961 data: 0.0002 max mem: 8426 +[2024-12-11 05:08:38 root] (utils.py 283): INFO Epoch: [28] [ 30/2502] eta: 1:38:44 lr: 0.000001 loss_cls: 4.1168 (3.8992) grad_norm: 2.4552 (2.4097) time: 2.4127 data: 0.0002 max mem: 8426 +[2024-12-11 05:09:01 root] (utils.py 283): INFO Epoch: [28] [ 40/2502] eta: 1:37:57 lr: 0.000001 loss_cls: 3.9409 (3.8529) grad_norm: 2.4195 (2.4317) time: 2.3891 data: 0.0003 max mem: 8426 +[2024-12-11 05:09:26 root] (utils.py 283): INFO Epoch: [28] [ 50/2502] eta: 1:38:01 lr: 0.000001 loss_cls: 3.7400 (3.8199) grad_norm: 2.4128 (2.4276) time: 2.4015 data: 0.0003 max mem: 8426 +[2024-12-11 05:09:50 root] (utils.py 283): INFO Epoch: [28] [ 60/2502] eta: 1:37:25 lr: 0.000001 loss_cls: 3.7400 (3.8332) grad_norm: 2.4128 (2.4298) time: 2.4066 data: 0.0003 max mem: 8426 +[2024-12-11 05:10:13 root] (utils.py 283): INFO Epoch: [28] [ 70/2502] eta: 1:36:49 lr: 0.000001 loss_cls: 3.9618 (3.8356) grad_norm: 2.4282 (2.4326) time: 2.3629 data: 0.0003 max mem: 8426 +[2024-12-11 05:10:37 root] (utils.py 283): INFO Epoch: [28] [ 80/2502] eta: 1:36:21 lr: 0.000001 loss_cls: 3.7493 (3.8170) grad_norm: 2.4461 (2.4400) time: 2.3675 data: 0.0003 max mem: 8426 +[2024-12-11 05:11:00 root] (utils.py 283): INFO Epoch: [28] [ 90/2502] eta: 1:35:48 lr: 0.000001 loss_cls: 3.5968 (3.8211) grad_norm: 2.4623 (2.4403) time: 2.3636 data: 0.0002 max mem: 8426 +[2024-12-11 05:11:24 root] (utils.py 283): INFO Epoch: [28] [ 100/2502] eta: 1:35:30 lr: 0.000001 loss_cls: 4.0402 (3.8410) grad_norm: 2.4153 (2.4338) time: 2.3796 data: 0.0002 max mem: 8426 +[2024-12-11 05:11:48 root] (utils.py 283): INFO Epoch: [28] [ 110/2502] eta: 1:34:58 lr: 0.000001 loss_cls: 4.0887 (3.8513) grad_norm: 2.4169 (2.4360) time: 2.3794 data: 0.0002 max mem: 8426 +[2024-12-11 05:12:12 root] (utils.py 283): INFO Epoch: [28] [ 120/2502] eta: 1:34:31 lr: 0.000001 loss_cls: 3.7334 (3.8230) grad_norm: 2.4476 (2.4355) time: 2.3562 data: 0.0003 max mem: 8426 +[2024-12-11 05:12:35 root] (utils.py 283): INFO Epoch: [28] [ 130/2502] eta: 1:34:04 lr: 0.000001 loss_cls: 3.7334 (3.8355) grad_norm: 2.4214 (2.4393) time: 2.3648 data: 0.0002 max mem: 8426 +[2024-12-11 05:12:59 root] (utils.py 283): INFO Epoch: [28] [ 140/2502] eta: 1:33:40 lr: 0.000001 loss_cls: 4.0411 (3.8493) grad_norm: 2.4182 (2.4369) time: 2.3729 data: 0.0002 max mem: 8426 +[2024-12-11 05:13:23 root] (utils.py 283): INFO Epoch: [28] [ 150/2502] eta: 1:33:24 lr: 0.000001 loss_cls: 4.1461 (3.8655) grad_norm: 2.4182 (2.4374) time: 2.4035 data: 0.0002 max mem: 8426 +[2024-12-11 05:13:47 root] (utils.py 283): INFO Epoch: [28] [ 160/2502] eta: 1:32:53 lr: 0.000001 loss_cls: 4.0700 (3.8506) grad_norm: 2.4459 (2.4417) time: 2.3822 data: 0.0002 max mem: 8426 +[2024-12-11 05:14:10 root] (utils.py 283): INFO Epoch: [28] [ 170/2502] eta: 1:32:26 lr: 0.000001 loss_cls: 3.8406 (3.8343) grad_norm: 2.3747 (2.4363) time: 2.3455 data: 0.0002 max mem: 8426 +[2024-12-11 05:14:34 root] (utils.py 283): INFO Epoch: [28] [ 180/2502] eta: 1:32:01 lr: 0.000001 loss_cls: 3.8406 (3.8374) grad_norm: 2.3405 (2.4351) time: 2.3628 data: 0.0002 max mem: 8426 +[2024-12-11 05:14:58 root] (utils.py 283): INFO Epoch: [28] [ 190/2502] eta: 1:31:35 lr: 0.000001 loss_cls: 3.9530 (3.8402) grad_norm: 2.4562 (2.4350) time: 2.3628 data: 0.0003 max mem: 8426 +[2024-12-11 05:15:21 root] (utils.py 283): INFO Epoch: [28] [ 200/2502] eta: 1:31:10 lr: 0.000001 loss_cls: 3.7687 (3.8230) grad_norm: 2.4562 (2.4378) time: 2.3629 data: 0.0003 max mem: 8426 +[2024-12-11 05:15:45 root] (utils.py 283): INFO Epoch: [28] [ 210/2502] eta: 1:30:49 lr: 0.000001 loss_cls: 3.7687 (3.8329) grad_norm: 2.4499 (2.4362) time: 2.3826 data: 0.0002 max mem: 8426 +[2024-12-11 05:16:09 root] (utils.py 283): INFO Epoch: [28] [ 220/2502] eta: 1:30:25 lr: 0.000001 loss_cls: 4.1269 (3.8378) grad_norm: 2.4357 (2.4363) time: 2.3877 data: 0.0002 max mem: 8426 +[2024-12-11 05:16:33 root] (utils.py 283): INFO Epoch: [28] [ 230/2502] eta: 1:30:02 lr: 0.000001 loss_cls: 3.9234 (3.8346) grad_norm: 2.4105 (2.4358) time: 2.3814 data: 0.0002 max mem: 8426 +[2024-12-11 05:16:56 root] (utils.py 283): INFO Epoch: [28] [ 240/2502] eta: 1:29:37 lr: 0.000001 loss_cls: 3.9358 (3.8438) grad_norm: 2.4105 (2.4340) time: 2.3721 data: 0.0002 max mem: 8426 +[2024-12-11 05:17:19 root] (utils.py 283): INFO Epoch: [28] [ 250/2502] eta: 1:29:04 lr: 0.000001 loss_cls: 3.9358 (3.8417) grad_norm: 2.4072 (2.4330) time: 2.3229 data: 0.0003 max mem: 8426 +[2024-12-11 05:17:43 root] (utils.py 283): INFO Epoch: [28] [ 260/2502] eta: 1:28:44 lr: 0.000001 loss_cls: 3.6793 (3.8413) grad_norm: 2.4170 (2.4343) time: 2.3471 data: 0.0002 max mem: 8426 +[2024-12-11 05:18:08 root] (utils.py 283): INFO Epoch: [28] [ 270/2502] eta: 1:28:24 lr: 0.000001 loss_cls: 4.1175 (3.8425) grad_norm: 2.4592 (2.4373) time: 2.4165 data: 0.0002 max mem: 8426 +[2024-12-11 05:18:31 root] (utils.py 283): INFO Epoch: [28] [ 280/2502] eta: 1:27:58 lr: 0.000001 loss_cls: 4.1468 (3.8529) grad_norm: 2.4583 (2.4370) time: 2.3866 data: 0.0002 max mem: 8426 +[2024-12-11 05:18:55 root] (utils.py 283): INFO Epoch: [28] [ 290/2502] eta: 1:27:35 lr: 0.000001 loss_cls: 4.1696 (3.8633) grad_norm: 2.4450 (2.4360) time: 2.3662 data: 0.0002 max mem: 8426 +[2024-12-11 05:19:19 root] (utils.py 283): INFO Epoch: [28] [ 300/2502] eta: 1:27:12 lr: 0.000001 loss_cls: 4.1644 (3.8631) grad_norm: 2.3939 (2.4337) time: 2.3871 data: 0.0002 max mem: 8426 +[2024-12-11 05:19:43 root] (utils.py 283): INFO Epoch: [28] [ 310/2502] eta: 1:26:49 lr: 0.000001 loss_cls: 4.0148 (3.8601) grad_norm: 2.3564 (2.4339) time: 2.3849 data: 0.0002 max mem: 8426 +[2024-12-11 05:20:06 root] (utils.py 283): INFO Epoch: [28] [ 320/2502] eta: 1:26:24 lr: 0.000001 loss_cls: 4.0348 (3.8676) grad_norm: 2.4048 (2.4357) time: 2.3679 data: 0.0002 max mem: 8426 +[2024-12-11 05:20:29 root] (utils.py 283): INFO Epoch: [28] [ 330/2502] eta: 1:25:53 lr: 0.000001 loss_cls: 4.2118 (3.8766) grad_norm: 2.4128 (2.4372) time: 2.3155 data: 0.0003 max mem: 8426 +[2024-12-11 05:20:52 root] (utils.py 283): INFO Epoch: [28] [ 340/2502] eta: 1:25:27 lr: 0.000001 loss_cls: 4.1983 (3.8827) grad_norm: 2.4128 (2.4388) time: 2.2997 data: 0.0003 max mem: 8426 +[2024-12-11 05:21:15 root] (utils.py 283): INFO Epoch: [28] [ 350/2502] eta: 1:24:59 lr: 0.000001 loss_cls: 4.1335 (3.8835) grad_norm: 2.4195 (2.4392) time: 2.3161 data: 0.0003 max mem: 8426 +[2024-12-11 05:21:39 root] (utils.py 283): INFO Epoch: [28] [ 360/2502] eta: 1:24:34 lr: 0.000001 loss_cls: 3.9837 (3.8857) grad_norm: 2.4447 (2.4394) time: 2.3302 data: 0.0003 max mem: 8426 +[2024-12-11 05:22:02 root] (utils.py 283): INFO Epoch: [28] [ 370/2502] eta: 1:24:09 lr: 0.000001 loss_cls: 3.9715 (3.8819) grad_norm: 2.4165 (2.4392) time: 2.3501 data: 0.0002 max mem: 8426 +[2024-12-11 05:22:25 root] (utils.py 283): INFO Epoch: [28] [ 380/2502] eta: 1:23:42 lr: 0.000001 loss_cls: 3.8780 (3.8826) grad_norm: 2.3481 (2.4393) time: 2.3265 data: 0.0002 max mem: 8426 +[2024-12-11 05:22:49 root] (utils.py 283): INFO Epoch: [28] [ 390/2502] eta: 1:23:18 lr: 0.000001 loss_cls: 3.8476 (3.8835) grad_norm: 2.3844 (2.4386) time: 2.3324 data: 0.0002 max mem: 8426 +[2024-12-11 05:23:12 root] (utils.py 283): INFO Epoch: [28] [ 400/2502] eta: 1:22:51 lr: 0.000001 loss_cls: 4.0096 (3.8870) grad_norm: 2.3844 (2.4388) time: 2.3313 data: 0.0002 max mem: 8426 +[2024-12-11 05:23:35 root] (utils.py 283): INFO Epoch: [28] [ 410/2502] eta: 1:22:26 lr: 0.000001 loss_cls: 4.0555 (3.8842) grad_norm: 2.4892 (2.4395) time: 2.3246 data: 0.0003 max mem: 8426 +[2024-12-11 05:23:58 root] (utils.py 283): INFO Epoch: [28] [ 420/2502] eta: 1:21:59 lr: 0.000001 loss_cls: 3.9193 (3.8786) grad_norm: 2.4532 (2.4399) time: 2.3158 data: 0.0003 max mem: 8426 +[2024-12-11 05:24:22 root] (utils.py 283): INFO Epoch: [28] [ 430/2502] eta: 1:21:36 lr: 0.000001 loss_cls: 4.0793 (3.8796) grad_norm: 2.4283 (2.4402) time: 2.3348 data: 0.0003 max mem: 8426 +[2024-12-11 05:24:45 root] (utils.py 283): INFO Epoch: [28] [ 440/2502] eta: 1:21:10 lr: 0.000001 loss_cls: 3.8874 (3.8746) grad_norm: 2.4158 (2.4403) time: 2.3451 data: 0.0003 max mem: 8426 +[2024-12-11 05:25:09 root] (utils.py 283): INFO Epoch: [28] [ 450/2502] eta: 1:20:45 lr: 0.000001 loss_cls: 3.9884 (3.8800) grad_norm: 2.4188 (2.4396) time: 2.3254 data: 0.0003 max mem: 8426 +[2024-12-11 05:25:32 root] (utils.py 283): INFO Epoch: [28] [ 460/2502] eta: 1:20:20 lr: 0.000001 loss_cls: 4.0031 (3.8806) grad_norm: 2.4498 (2.4412) time: 2.3284 data: 0.0003 max mem: 8426 +[2024-12-11 05:25:55 root] (utils.py 283): INFO Epoch: [28] [ 470/2502] eta: 1:19:56 lr: 0.000001 loss_cls: 3.9163 (3.8810) grad_norm: 2.4664 (2.4414) time: 2.3366 data: 0.0003 max mem: 8426 +[2024-12-11 05:26:18 root] (utils.py 283): INFO Epoch: [28] [ 480/2502] eta: 1:19:30 lr: 0.000001 loss_cls: 4.0957 (3.8879) grad_norm: 2.4132 (2.4417) time: 2.3364 data: 0.0003 max mem: 8426 +[2024-12-11 05:26:43 root] (utils.py 283): INFO Epoch: [28] [ 490/2502] eta: 1:19:09 lr: 0.000001 loss_cls: 4.1565 (3.8853) grad_norm: 2.3686 (2.4402) time: 2.3582 data: 0.0003 max mem: 8426 +[2024-12-11 05:27:06 root] (utils.py 283): INFO Epoch: [28] [ 500/2502] eta: 1:18:46 lr: 0.000001 loss_cls: 3.9726 (3.8860) grad_norm: 2.4096 (2.4401) time: 2.3887 data: 0.0003 max mem: 8426 +[2024-12-11 05:27:30 root] (utils.py 283): INFO Epoch: [28] [ 510/2502] eta: 1:18:23 lr: 0.000001 loss_cls: 3.9726 (3.8839) grad_norm: 2.4239 (2.4397) time: 2.3773 data: 0.0002 max mem: 8426 +[2024-12-11 05:27:54 root] (utils.py 283): INFO Epoch: [28] [ 520/2502] eta: 1:18:01 lr: 0.000001 loss_cls: 3.8398 (3.8843) grad_norm: 2.3969 (2.4388) time: 2.3920 data: 0.0002 max mem: 8426 +[2024-12-11 05:28:18 root] (utils.py 283): INFO Epoch: [28] [ 530/2502] eta: 1:17:37 lr: 0.000001 loss_cls: 3.8353 (3.8823) grad_norm: 2.4492 (2.4396) time: 2.3766 data: 0.0002 max mem: 8426 +[2024-12-11 05:28:41 root] (utils.py 283): INFO Epoch: [28] [ 540/2502] eta: 1:17:12 lr: 0.000001 loss_cls: 3.8768 (3.8870) grad_norm: 2.4292 (2.4394) time: 2.3343 data: 0.0002 max mem: 8426 +[2024-12-11 05:29:04 root] (utils.py 283): INFO Epoch: [28] [ 550/2502] eta: 1:16:46 lr: 0.000001 loss_cls: 4.0417 (3.8899) grad_norm: 2.4081 (2.4390) time: 2.3091 data: 0.0003 max mem: 8426 +[2024-12-11 05:29:28 root] (utils.py 283): INFO Epoch: [28] [ 560/2502] eta: 1:16:24 lr: 0.000001 loss_cls: 4.0417 (3.8885) grad_norm: 2.4129 (2.4387) time: 2.3624 data: 0.0003 max mem: 8426 +[2024-12-11 05:29:52 root] (utils.py 283): INFO Epoch: [28] [ 570/2502] eta: 1:16:01 lr: 0.000001 loss_cls: 3.9346 (3.8872) grad_norm: 2.3620 (2.4365) time: 2.4009 data: 0.0002 max mem: 8426 +[2024-12-11 05:30:15 root] (utils.py 283): INFO Epoch: [28] [ 580/2502] eta: 1:15:36 lr: 0.000001 loss_cls: 4.1011 (3.8895) grad_norm: 2.3561 (2.4359) time: 2.3383 data: 0.0002 max mem: 8426 +[2024-12-11 05:30:38 root] (utils.py 283): INFO Epoch: [28] [ 590/2502] eta: 1:15:09 lr: 0.000001 loss_cls: 3.9484 (3.8876) grad_norm: 2.3804 (2.4374) time: 2.2910 data: 0.0003 max mem: 8426 +[2024-12-11 05:31:00 root] (utils.py 283): INFO Epoch: [28] [ 600/2502] eta: 1:14:43 lr: 0.000001 loss_cls: 3.9042 (3.8882) grad_norm: 2.3292 (2.4349) time: 2.2797 data: 0.0002 max mem: 8426 +[2024-12-11 05:31:24 root] (utils.py 283): INFO Epoch: [28] [ 610/2502] eta: 1:14:19 lr: 0.000001 loss_cls: 3.9711 (3.8926) grad_norm: 2.3251 (2.4344) time: 2.2986 data: 0.0003 max mem: 8426 +[2024-12-11 05:31:47 root] (utils.py 283): INFO Epoch: [28] [ 620/2502] eta: 1:13:55 lr: 0.000001 loss_cls: 3.9564 (3.8925) grad_norm: 2.4014 (2.4339) time: 2.3427 data: 0.0003 max mem: 8426 +[2024-12-11 05:32:11 root] (utils.py 283): INFO Epoch: [28] [ 630/2502] eta: 1:13:33 lr: 0.000001 loss_cls: 3.9564 (3.8938) grad_norm: 2.4121 (2.4341) time: 2.3786 data: 0.0003 max mem: 8426 +[2024-12-11 05:32:35 root] (utils.py 283): INFO Epoch: [28] [ 640/2502] eta: 1:13:09 lr: 0.000001 loss_cls: 4.0946 (3.8936) grad_norm: 2.3963 (2.4332) time: 2.3618 data: 0.0002 max mem: 8426 +[2024-12-11 05:32:58 root] (utils.py 283): INFO Epoch: [28] [ 650/2502] eta: 1:12:44 lr: 0.000001 loss_cls: 4.0306 (3.8976) grad_norm: 2.3987 (2.4339) time: 2.3329 data: 0.0002 max mem: 8426 +[2024-12-11 05:33:22 root] (utils.py 283): INFO Epoch: [28] [ 660/2502] eta: 1:12:23 lr: 0.000001 loss_cls: 4.2105 (3.8984) grad_norm: 2.5002 (2.4349) time: 2.3853 data: 0.0002 max mem: 8426 +[2024-12-11 05:33:46 root] (utils.py 283): INFO Epoch: [28] [ 670/2502] eta: 1:12:01 lr: 0.000001 loss_cls: 4.1110 (3.8985) grad_norm: 2.3925 (2.4336) time: 2.4310 data: 0.0003 max mem: 8426 +[2024-12-11 05:34:10 root] (utils.py 283): INFO Epoch: [28] [ 680/2502] eta: 1:11:39 lr: 0.000001 loss_cls: 4.1018 (3.9010) grad_norm: 2.3950 (2.4353) time: 2.4106 data: 0.0003 max mem: 8426 +[2024-12-11 05:34:34 root] (utils.py 283): INFO Epoch: [28] [ 690/2502] eta: 1:11:15 lr: 0.000001 loss_cls: 3.9455 (3.8984) grad_norm: 2.4300 (2.4360) time: 2.3775 data: 0.0003 max mem: 8426 +[2024-12-11 05:34:58 root] (utils.py 283): INFO Epoch: [28] [ 700/2502] eta: 1:10:52 lr: 0.000001 loss_cls: 3.5151 (3.8935) grad_norm: 2.4623 (2.4359) time: 2.3643 data: 0.0003 max mem: 8426 +[2024-12-11 05:35:21 root] (utils.py 283): INFO Epoch: [28] [ 710/2502] eta: 1:10:27 lr: 0.000001 loss_cls: 3.7477 (3.8929) grad_norm: 2.4377 (2.4360) time: 2.3495 data: 0.0002 max mem: 8426 +[2024-12-11 05:35:45 root] (utils.py 283): INFO Epoch: [28] [ 720/2502] eta: 1:10:05 lr: 0.000001 loss_cls: 4.0088 (3.8909) grad_norm: 2.4377 (2.4366) time: 2.3616 data: 0.0003 max mem: 8426 +[2024-12-11 05:36:09 root] (utils.py 283): INFO Epoch: [28] [ 730/2502] eta: 1:09:42 lr: 0.000001 loss_cls: 3.9922 (3.8914) grad_norm: 2.4632 (2.4381) time: 2.3915 data: 0.0003 max mem: 8426 +[2024-12-11 05:36:32 root] (utils.py 283): INFO Epoch: [28] [ 740/2502] eta: 1:09:16 lr: 0.000001 loss_cls: 3.6798 (3.8858) grad_norm: 2.4011 (2.4373) time: 2.3403 data: 0.0002 max mem: 8426 +[2024-12-11 05:36:56 root] (utils.py 283): INFO Epoch: [28] [ 750/2502] eta: 1:08:53 lr: 0.000001 loss_cls: 3.4183 (3.8868) grad_norm: 2.4011 (2.4378) time: 2.3391 data: 0.0002 max mem: 8426 +[2024-12-11 05:37:19 root] (utils.py 283): INFO Epoch: [28] [ 760/2502] eta: 1:08:30 lr: 0.000001 loss_cls: 3.8799 (3.8850) grad_norm: 2.4791 (2.4383) time: 2.3863 data: 0.0002 max mem: 8426 +[2024-12-11 05:37:43 root] (utils.py 283): INFO Epoch: [28] [ 770/2502] eta: 1:08:07 lr: 0.000001 loss_cls: 3.9062 (3.8853) grad_norm: 2.4791 (2.4390) time: 2.3823 data: 0.0003 max mem: 8426 +[2024-12-11 05:38:08 root] (utils.py 283): INFO Epoch: [28] [ 780/2502] eta: 1:07:45 lr: 0.000001 loss_cls: 4.0627 (3.8841) grad_norm: 2.4714 (2.4391) time: 2.4009 data: 0.0002 max mem: 8426 +[2024-12-11 05:38:31 root] (utils.py 283): INFO Epoch: [28] [ 790/2502] eta: 1:07:22 lr: 0.000001 loss_cls: 3.7887 (3.8814) grad_norm: 2.4063 (2.4388) time: 2.4079 data: 0.0003 max mem: 8426 +[2024-12-11 05:38:55 root] (utils.py 283): INFO Epoch: [28] [ 800/2502] eta: 1:06:59 lr: 0.000001 loss_cls: 4.0490 (3.8837) grad_norm: 2.4058 (2.4385) time: 2.3912 data: 0.0003 max mem: 8426 +[2024-12-11 05:39:20 root] (utils.py 283): INFO Epoch: [28] [ 810/2502] eta: 1:06:37 lr: 0.000001 loss_cls: 4.1824 (3.8864) grad_norm: 2.3715 (2.4380) time: 2.4177 data: 0.0003 max mem: 8426 +[2024-12-11 05:39:44 root] (utils.py 283): INFO Epoch: [28] [ 820/2502] eta: 1:06:15 lr: 0.000001 loss_cls: 3.8903 (3.8829) grad_norm: 2.3805 (2.4381) time: 2.4274 data: 0.0003 max mem: 8426 +[2024-12-11 05:40:07 root] (utils.py 283): INFO Epoch: [28] [ 830/2502] eta: 1:05:50 lr: 0.000001 loss_cls: 3.6117 (3.8818) grad_norm: 2.4095 (2.4382) time: 2.3738 data: 0.0002 max mem: 8426 +[2024-12-11 05:40:30 root] (utils.py 283): INFO Epoch: [28] [ 840/2502] eta: 1:05:25 lr: 0.000001 loss_cls: 4.0747 (3.8821) grad_norm: 2.4607 (2.4390) time: 2.3097 data: 0.0002 max mem: 8426 +[2024-12-11 05:40:53 root] (utils.py 283): INFO Epoch: [28] [ 850/2502] eta: 1:05:01 lr: 0.000001 loss_cls: 4.1766 (3.8827) grad_norm: 2.4613 (2.4396) time: 2.3090 data: 0.0003 max mem: 8426 +[2024-12-11 05:41:16 root] (utils.py 283): INFO Epoch: [28] [ 860/2502] eta: 1:04:36 lr: 0.000001 loss_cls: 4.1766 (3.8873) grad_norm: 2.4499 (2.4388) time: 2.3027 data: 0.0003 max mem: 8426 +[2024-12-11 05:41:42 root] (utils.py 283): INFO Epoch: [28] [ 870/2502] eta: 1:04:16 lr: 0.000001 loss_cls: 4.0719 (3.8859) grad_norm: 2.4243 (2.4399) time: 2.4277 data: 0.0003 max mem: 8426 +[2024-12-11 05:42:06 root] (utils.py 283): INFO Epoch: [28] [ 880/2502] eta: 1:03:54 lr: 0.000001 loss_cls: 3.8699 (3.8824) grad_norm: 2.4998 (2.4412) time: 2.5144 data: 0.0002 max mem: 8426 +[2024-12-11 05:42:30 root] (utils.py 283): INFO Epoch: [28] [ 890/2502] eta: 1:03:30 lr: 0.000001 loss_cls: 4.0329 (3.8856) grad_norm: 2.5319 (2.4423) time: 2.4030 data: 0.0002 max mem: 8426 +[2024-12-11 05:42:54 root] (utils.py 283): INFO Epoch: [28] [ 900/2502] eta: 1:03:08 lr: 0.000001 loss_cls: 4.1375 (3.8898) grad_norm: 2.4842 (2.4424) time: 2.3836 data: 0.0002 max mem: 8426 +[2024-12-11 05:43:18 root] (utils.py 283): INFO Epoch: [28] [ 910/2502] eta: 1:02:44 lr: 0.000001 loss_cls: 4.2670 (3.8945) grad_norm: 2.4842 (2.4435) time: 2.3974 data: 0.0003 max mem: 8426 +[2024-12-11 05:43:42 root] (utils.py 283): INFO Epoch: [28] [ 920/2502] eta: 1:02:21 lr: 0.000001 loss_cls: 4.1120 (3.8943) grad_norm: 2.4926 (2.4446) time: 2.3764 data: 0.0002 max mem: 8426 +[2024-12-11 05:44:05 root] (utils.py 283): INFO Epoch: [28] [ 930/2502] eta: 1:01:57 lr: 0.000001 loss_cls: 3.9572 (3.8936) grad_norm: 2.4899 (2.4454) time: 2.3638 data: 0.0002 max mem: 8426 +[2024-12-11 05:44:29 root] (utils.py 283): INFO Epoch: [28] [ 940/2502] eta: 1:01:33 lr: 0.000001 loss_cls: 3.8242 (3.8923) grad_norm: 2.4298 (2.4450) time: 2.3614 data: 0.0003 max mem: 8426 +[2024-12-11 05:44:53 root] (utils.py 283): INFO Epoch: [28] [ 950/2502] eta: 1:01:10 lr: 0.000001 loss_cls: 3.8698 (3.8916) grad_norm: 2.4044 (2.4447) time: 2.3785 data: 0.0002 max mem: 8426 +[2024-12-11 05:45:17 root] (utils.py 283): INFO Epoch: [28] [ 960/2502] eta: 1:00:48 lr: 0.000001 loss_cls: 4.0615 (3.8937) grad_norm: 2.4565 (2.4449) time: 2.4238 data: 0.0002 max mem: 8426 +[2024-12-11 05:45:41 root] (utils.py 283): INFO Epoch: [28] [ 970/2502] eta: 1:00:25 lr: 0.000001 loss_cls: 4.0040 (3.8945) grad_norm: 2.4566 (2.4444) time: 2.4317 data: 0.0003 max mem: 8426 +[2024-12-11 05:46:06 root] (utils.py 283): INFO Epoch: [28] [ 980/2502] eta: 1:00:03 lr: 0.000001 loss_cls: 3.9529 (3.8964) grad_norm: 2.4600 (2.4448) time: 2.4576 data: 0.0003 max mem: 8426 +[2024-12-11 05:46:31 root] (utils.py 283): INFO Epoch: [28] [ 990/2502] eta: 0:59:41 lr: 0.000001 loss_cls: 4.2196 (3.8986) grad_norm: 2.5088 (2.4449) time: 2.4626 data: 0.0002 max mem: 8426 +[2024-12-11 05:46:55 root] (utils.py 283): INFO Epoch: [28] [1000/2502] eta: 0:59:17 lr: 0.000001 loss_cls: 4.0608 (3.9003) grad_norm: 2.4386 (2.4448) time: 2.4107 data: 0.0002 max mem: 8426 +[2024-12-11 05:47:19 root] (utils.py 283): INFO Epoch: [28] [1010/2502] eta: 0:58:54 lr: 0.000001 loss_cls: 3.9822 (3.8971) grad_norm: 2.4215 (2.4447) time: 2.3933 data: 0.0002 max mem: 8426 +[2024-12-11 05:47:42 root] (utils.py 283): INFO Epoch: [28] [1020/2502] eta: 0:58:30 lr: 0.000001 loss_cls: 3.7730 (3.8951) grad_norm: 2.5144 (2.4457) time: 2.3854 data: 0.0002 max mem: 8426 +[2024-12-11 05:48:06 root] (utils.py 283): INFO Epoch: [28] [1030/2502] eta: 0:58:07 lr: 0.000001 loss_cls: 3.7730 (3.8941) grad_norm: 2.5144 (2.4459) time: 2.3782 data: 0.0002 max mem: 8426 +[2024-12-11 05:48:30 root] (utils.py 283): INFO Epoch: [28] [1040/2502] eta: 0:57:43 lr: 0.000001 loss_cls: 3.8748 (3.8930) grad_norm: 2.4628 (2.4463) time: 2.3785 data: 0.0002 max mem: 8426 +[2024-12-11 05:48:54 root] (utils.py 283): INFO Epoch: [28] [1050/2502] eta: 0:57:20 lr: 0.000001 loss_cls: 3.6118 (3.8909) grad_norm: 2.4819 (2.4474) time: 2.3792 data: 0.0002 max mem: 8426 +[2024-12-11 05:49:18 root] (utils.py 283): INFO Epoch: [28] [1060/2502] eta: 0:56:57 lr: 0.000001 loss_cls: 3.8825 (3.8907) grad_norm: 2.4289 (2.4474) time: 2.4183 data: 0.0002 max mem: 8426 +[2024-12-11 05:49:42 root] (utils.py 283): INFO Epoch: [28] [1070/2502] eta: 0:56:34 lr: 0.000001 loss_cls: 4.0110 (3.8929) grad_norm: 2.3906 (2.4470) time: 2.4286 data: 0.0002 max mem: 8426 +[2024-12-11 05:50:06 root] (utils.py 283): INFO Epoch: [28] [1080/2502] eta: 0:56:11 lr: 0.000001 loss_cls: 4.1805 (3.8928) grad_norm: 2.3744 (2.4467) time: 2.3955 data: 0.0002 max mem: 8426 +[2024-12-11 05:50:30 root] (utils.py 283): INFO Epoch: [28] [1090/2502] eta: 0:55:47 lr: 0.000001 loss_cls: 3.6419 (3.8912) grad_norm: 2.4593 (2.4471) time: 2.3860 data: 0.0002 max mem: 8426 +[2024-12-11 05:50:54 root] (utils.py 283): INFO Epoch: [28] [1100/2502] eta: 0:55:24 lr: 0.000001 loss_cls: 4.0459 (3.8923) grad_norm: 2.4677 (2.4473) time: 2.3928 data: 0.0002 max mem: 8426 +[2024-12-11 05:51:18 root] (utils.py 283): INFO Epoch: [28] [1110/2502] eta: 0:55:00 lr: 0.000001 loss_cls: 4.0459 (3.8923) grad_norm: 2.4043 (2.4463) time: 2.3800 data: 0.0002 max mem: 8426 +[2024-12-11 05:51:42 root] (utils.py 283): INFO Epoch: [28] [1120/2502] eta: 0:54:36 lr: 0.000001 loss_cls: 3.8895 (3.8927) grad_norm: 2.3530 (2.4466) time: 2.3768 data: 0.0002 max mem: 8426 +[2024-12-11 05:52:06 root] (utils.py 283): INFO Epoch: [28] [1130/2502] eta: 0:54:13 lr: 0.000001 loss_cls: 3.9435 (3.8938) grad_norm: 2.3946 (2.4467) time: 2.4016 data: 0.0002 max mem: 8426 +[2024-12-11 05:52:30 root] (utils.py 283): INFO Epoch: [28] [1140/2502] eta: 0:53:50 lr: 0.000001 loss_cls: 3.8386 (3.8901) grad_norm: 2.4514 (2.4465) time: 2.4020 data: 0.0002 max mem: 8426 +[2024-12-11 05:52:54 root] (utils.py 283): INFO Epoch: [28] [1150/2502] eta: 0:53:27 lr: 0.000001 loss_cls: 3.6677 (3.8890) grad_norm: 2.4616 (2.4467) time: 2.4178 data: 0.0002 max mem: 8426 +[2024-12-11 05:53:19 root] (utils.py 283): INFO Epoch: [28] [1160/2502] eta: 0:53:04 lr: 0.000001 loss_cls: 3.7513 (3.8892) grad_norm: 2.4599 (2.4470) time: 2.4453 data: 0.0002 max mem: 8426 +[2024-12-11 05:53:43 root] (utils.py 283): INFO Epoch: [28] [1170/2502] eta: 0:52:41 lr: 0.000001 loss_cls: 3.8617 (3.8888) grad_norm: 2.4705 (2.4472) time: 2.4256 data: 0.0002 max mem: 8426 +[2024-12-11 05:54:07 root] (utils.py 283): INFO Epoch: [28] [1180/2502] eta: 0:52:18 lr: 0.000001 loss_cls: 4.0162 (3.8899) grad_norm: 2.4553 (2.4469) time: 2.4170 data: 0.0003 max mem: 8426 +[2024-12-11 05:54:31 root] (utils.py 283): INFO Epoch: [28] [1190/2502] eta: 0:51:54 lr: 0.000001 loss_cls: 4.1360 (3.8930) grad_norm: 2.4281 (2.4471) time: 2.3982 data: 0.0003 max mem: 8426 +[2024-12-11 05:54:54 root] (utils.py 283): INFO Epoch: [28] [1200/2502] eta: 0:51:30 lr: 0.000001 loss_cls: 4.1360 (3.8922) grad_norm: 2.3881 (2.4472) time: 2.3603 data: 0.0003 max mem: 8426 +[2024-12-11 05:55:18 root] (utils.py 283): INFO Epoch: [28] [1210/2502] eta: 0:51:06 lr: 0.000001 loss_cls: 3.8865 (3.8919) grad_norm: 2.4477 (2.4478) time: 2.3701 data: 0.0003 max mem: 8426 +[2024-12-11 05:55:42 root] (utils.py 283): INFO Epoch: [28] [1220/2502] eta: 0:50:43 lr: 0.000001 loss_cls: 4.2820 (3.8940) grad_norm: 2.4477 (2.4477) time: 2.3843 data: 0.0002 max mem: 8426 +[2024-12-11 05:56:06 root] (utils.py 283): INFO Epoch: [28] [1230/2502] eta: 0:50:20 lr: 0.000001 loss_cls: 4.0816 (3.8914) grad_norm: 2.3947 (2.4476) time: 2.4246 data: 0.0002 max mem: 8426 +[2024-12-11 05:56:30 root] (utils.py 283): INFO Epoch: [28] [1240/2502] eta: 0:49:56 lr: 0.000001 loss_cls: 3.6766 (3.8907) grad_norm: 2.4644 (2.4478) time: 2.4297 data: 0.0002 max mem: 8426 +[2024-12-11 05:56:54 root] (utils.py 283): INFO Epoch: [28] [1250/2502] eta: 0:49:32 lr: 0.000001 loss_cls: 3.9676 (3.8913) grad_norm: 2.4348 (2.4475) time: 2.3680 data: 0.0002 max mem: 8426 +[2024-12-11 05:57:17 root] (utils.py 283): INFO Epoch: [28] [1260/2502] eta: 0:49:08 lr: 0.000001 loss_cls: 4.2042 (3.8934) grad_norm: 2.4390 (2.4484) time: 2.3472 data: 0.0002 max mem: 8426 +[2024-12-11 05:57:41 root] (utils.py 283): INFO Epoch: [28] [1270/2502] eta: 0:48:44 lr: 0.000001 loss_cls: 3.9351 (3.8897) grad_norm: 2.4606 (2.4487) time: 2.3586 data: 0.0002 max mem: 8426 +[2024-12-11 05:58:05 root] (utils.py 283): INFO Epoch: [28] [1280/2502] eta: 0:48:21 lr: 0.000001 loss_cls: 3.2806 (3.8875) grad_norm: 2.4374 (2.4492) time: 2.3683 data: 0.0002 max mem: 8426 +[2024-12-11 05:58:29 root] (utils.py 283): INFO Epoch: [28] [1290/2502] eta: 0:47:57 lr: 0.000001 loss_cls: 3.6399 (3.8861) grad_norm: 2.4374 (2.4492) time: 2.3864 data: 0.0002 max mem: 8426 +[2024-12-11 05:58:52 root] (utils.py 283): INFO Epoch: [28] [1300/2502] eta: 0:47:33 lr: 0.000001 loss_cls: 3.7614 (3.8838) grad_norm: 2.4084 (2.4492) time: 2.3648 data: 0.0002 max mem: 8426 +[2024-12-11 05:59:16 root] (utils.py 283): INFO Epoch: [28] [1310/2502] eta: 0:47:09 lr: 0.000001 loss_cls: 3.8987 (3.8838) grad_norm: 2.4254 (2.4493) time: 2.3519 data: 0.0002 max mem: 8426 +[2024-12-11 05:59:39 root] (utils.py 283): INFO Epoch: [28] [1320/2502] eta: 0:46:45 lr: 0.000001 loss_cls: 3.9466 (3.8823) grad_norm: 2.4614 (2.4495) time: 2.3535 data: 0.0002 max mem: 8426 +[2024-12-11 06:00:03 root] (utils.py 283): INFO Epoch: [28] [1330/2502] eta: 0:46:22 lr: 0.000001 loss_cls: 3.9268 (3.8827) grad_norm: 2.4563 (2.4497) time: 2.3691 data: 0.0002 max mem: 8426 +[2024-12-11 06:00:27 root] (utils.py 283): INFO Epoch: [28] [1340/2502] eta: 0:45:58 lr: 0.000001 loss_cls: 3.9268 (3.8828) grad_norm: 2.4314 (2.4493) time: 2.3699 data: 0.0002 max mem: 8426 +[2024-12-11 06:00:50 root] (utils.py 283): INFO Epoch: [28] [1350/2502] eta: 0:45:34 lr: 0.000001 loss_cls: 3.9349 (3.8833) grad_norm: 2.4174 (2.4493) time: 2.3546 data: 0.0002 max mem: 8426 +[2024-12-11 06:01:14 root] (utils.py 283): INFO Epoch: [28] [1360/2502] eta: 0:45:10 lr: 0.000001 loss_cls: 4.1682 (3.8849) grad_norm: 2.3820 (2.4487) time: 2.3601 data: 0.0002 max mem: 8426 +[2024-12-11 06:01:38 root] (utils.py 283): INFO Epoch: [28] [1370/2502] eta: 0:44:46 lr: 0.000001 loss_cls: 3.8762 (3.8815) grad_norm: 2.3894 (2.4487) time: 2.3655 data: 0.0002 max mem: 8426 +[2024-12-11 06:02:01 root] (utils.py 283): INFO Epoch: [28] [1380/2502] eta: 0:44:22 lr: 0.000001 loss_cls: 3.8017 (3.8829) grad_norm: 2.4101 (2.4483) time: 2.3618 data: 0.0002 max mem: 8426 +[2024-12-11 06:02:25 root] (utils.py 283): INFO Epoch: [28] [1390/2502] eta: 0:43:58 lr: 0.000001 loss_cls: 4.1625 (3.8854) grad_norm: 2.3720 (2.4479) time: 2.3618 data: 0.0002 max mem: 8426 +[2024-12-11 06:02:48 root] (utils.py 283): INFO Epoch: [28] [1400/2502] eta: 0:43:34 lr: 0.000001 loss_cls: 4.1132 (3.8857) grad_norm: 2.4081 (2.4483) time: 2.3546 data: 0.0002 max mem: 8426 +[2024-12-11 06:03:13 root] (utils.py 283): INFO Epoch: [28] [1410/2502] eta: 0:43:11 lr: 0.000001 loss_cls: 4.0262 (3.8858) grad_norm: 2.4081 (2.4477) time: 2.3913 data: 0.0002 max mem: 8426 +[2024-12-11 06:03:38 root] (utils.py 283): INFO Epoch: [28] [1420/2502] eta: 0:42:49 lr: 0.000001 loss_cls: 4.1208 (3.8860) grad_norm: 2.3976 (2.4476) time: 2.4756 data: 0.0002 max mem: 8426 +[2024-12-11 06:04:02 root] (utils.py 283): INFO Epoch: [28] [1430/2502] eta: 0:42:26 lr: 0.000001 loss_cls: 4.2255 (3.8881) grad_norm: 2.4301 (2.4482) time: 2.4925 data: 0.0002 max mem: 8426 +[2024-12-11 06:04:27 root] (utils.py 283): INFO Epoch: [28] [1440/2502] eta: 0:42:02 lr: 0.000001 loss_cls: 4.2590 (3.8878) grad_norm: 2.4661 (2.4481) time: 2.4703 data: 0.0002 max mem: 8426 +[2024-12-11 06:04:51 root] (utils.py 283): INFO Epoch: [28] [1450/2502] eta: 0:41:39 lr: 0.000001 loss_cls: 4.0932 (3.8869) grad_norm: 2.4589 (2.4483) time: 2.4257 data: 0.0002 max mem: 8426 +[2024-12-11 06:05:15 root] (utils.py 283): INFO Epoch: [28] [1460/2502] eta: 0:41:15 lr: 0.000001 loss_cls: 3.6087 (3.8842) grad_norm: 2.4570 (2.4488) time: 2.4114 data: 0.0002 max mem: 8426 +[2024-12-11 06:05:39 root] (utils.py 283): INFO Epoch: [28] [1470/2502] eta: 0:40:52 lr: 0.000001 loss_cls: 3.6087 (3.8841) grad_norm: 2.4558 (2.4489) time: 2.3984 data: 0.0002 max mem: 8426 +[2024-12-11 06:06:03 root] (utils.py 283): INFO Epoch: [28] [1480/2502] eta: 0:40:28 lr: 0.000001 loss_cls: 3.6316 (3.8833) grad_norm: 2.4558 (2.4488) time: 2.3686 data: 0.0003 max mem: 8426 +[2024-12-11 06:06:26 root] (utils.py 283): INFO Epoch: [28] [1490/2502] eta: 0:40:04 lr: 0.000001 loss_cls: 3.5850 (3.8822) grad_norm: 2.4364 (2.4489) time: 2.3364 data: 0.0002 max mem: 8426 +[2024-12-11 06:06:49 root] (utils.py 283): INFO Epoch: [28] [1500/2502] eta: 0:39:40 lr: 0.000001 loss_cls: 3.6527 (3.8817) grad_norm: 2.4364 (2.4490) time: 2.3201 data: 0.0002 max mem: 8426 +[2024-12-11 06:07:13 root] (utils.py 283): INFO Epoch: [28] [1510/2502] eta: 0:39:16 lr: 0.000001 loss_cls: 3.7947 (3.8820) grad_norm: 2.4714 (2.4492) time: 2.3783 data: 0.0002 max mem: 8426 +[2024-12-11 06:07:37 root] (utils.py 283): INFO Epoch: [28] [1520/2502] eta: 0:38:52 lr: 0.000001 loss_cls: 3.9223 (3.8823) grad_norm: 2.4306 (2.4486) time: 2.3908 data: 0.0002 max mem: 8426 +[2024-12-11 06:08:01 root] (utils.py 283): INFO Epoch: [28] [1530/2502] eta: 0:38:28 lr: 0.000001 loss_cls: 3.9223 (3.8816) grad_norm: 2.3465 (2.4488) time: 2.3671 data: 0.0002 max mem: 8426 +[2024-12-11 06:08:24 root] (utils.py 283): INFO Epoch: [28] [1540/2502] eta: 0:38:05 lr: 0.000001 loss_cls: 4.0252 (3.8816) grad_norm: 2.4190 (2.4489) time: 2.3605 data: 0.0002 max mem: 8426 +[2024-12-11 06:08:48 root] (utils.py 283): INFO Epoch: [28] [1550/2502] eta: 0:37:41 lr: 0.000001 loss_cls: 3.8386 (3.8810) grad_norm: 2.3667 (2.4486) time: 2.3534 data: 0.0002 max mem: 8426 +[2024-12-11 06:09:11 root] (utils.py 283): INFO Epoch: [28] [1560/2502] eta: 0:37:17 lr: 0.000001 loss_cls: 3.7970 (3.8811) grad_norm: 2.4287 (2.4486) time: 2.3602 data: 0.0002 max mem: 8426 +[2024-12-11 06:09:37 root] (utils.py 283): INFO Epoch: [28] [1570/2502] eta: 0:36:54 lr: 0.000001 loss_cls: 4.2260 (3.8825) grad_norm: 2.4522 (2.4494) time: 2.4496 data: 0.0002 max mem: 8426 +[2024-12-11 06:10:02 root] (utils.py 283): INFO Epoch: [28] [1580/2502] eta: 0:36:31 lr: 0.000001 loss_cls: 4.2701 (3.8837) grad_norm: 2.4958 (2.4498) time: 2.5353 data: 0.0002 max mem: 8426 +[2024-12-11 06:10:26 root] (utils.py 283): INFO Epoch: [28] [1590/2502] eta: 0:36:08 lr: 0.000001 loss_cls: 4.1799 (3.8827) grad_norm: 2.4669 (2.4498) time: 2.4696 data: 0.0002 max mem: 8426 +[2024-12-11 06:10:50 root] (utils.py 283): INFO Epoch: [28] [1600/2502] eta: 0:35:44 lr: 0.000001 loss_cls: 3.7577 (3.8818) grad_norm: 2.4201 (2.4498) time: 2.4140 data: 0.0002 max mem: 8426 +[2024-12-11 06:11:15 root] (utils.py 283): INFO Epoch: [28] [1610/2502] eta: 0:35:21 lr: 0.000001 loss_cls: 3.9373 (3.8823) grad_norm: 2.4081 (2.4498) time: 2.4396 data: 0.0002 max mem: 8426 +[2024-12-11 06:11:40 root] (utils.py 283): INFO Epoch: [28] [1620/2502] eta: 0:34:58 lr: 0.000001 loss_cls: 4.0854 (3.8840) grad_norm: 2.3796 (2.4498) time: 2.4704 data: 0.0002 max mem: 8426 +[2024-12-11 06:12:03 root] (utils.py 283): INFO Epoch: [28] [1630/2502] eta: 0:34:34 lr: 0.000001 loss_cls: 4.1262 (3.8837) grad_norm: 2.3796 (2.4496) time: 2.4196 data: 0.0002 max mem: 8426 +[2024-12-11 06:12:27 root] (utils.py 283): INFO Epoch: [28] [1640/2502] eta: 0:34:10 lr: 0.000001 loss_cls: 4.0539 (3.8847) grad_norm: 2.3530 (2.4491) time: 2.3841 data: 0.0003 max mem: 8426 +[2024-12-11 06:12:52 root] (utils.py 283): INFO Epoch: [28] [1650/2502] eta: 0:33:47 lr: 0.000001 loss_cls: 4.0539 (3.8854) grad_norm: 2.3530 (2.4488) time: 2.4362 data: 0.0003 max mem: 8426 +[2024-12-11 06:13:16 root] (utils.py 283): INFO Epoch: [28] [1660/2502] eta: 0:33:23 lr: 0.000001 loss_cls: 3.9239 (3.8841) grad_norm: 2.4201 (2.4486) time: 2.4314 data: 0.0003 max mem: 8426 +[2024-12-11 06:13:40 root] (utils.py 283): INFO Epoch: [28] [1670/2502] eta: 0:32:59 lr: 0.000001 loss_cls: 3.8449 (3.8841) grad_norm: 2.3817 (2.4483) time: 2.3962 data: 0.0003 max mem: 8426 +[2024-12-11 06:14:04 root] (utils.py 283): INFO Epoch: [28] [1680/2502] eta: 0:32:35 lr: 0.000001 loss_cls: 4.1696 (3.8852) grad_norm: 2.4225 (2.4487) time: 2.3825 data: 0.0003 max mem: 8426 +[2024-12-11 06:14:27 root] (utils.py 283): INFO Epoch: [28] [1690/2502] eta: 0:32:12 lr: 0.000001 loss_cls: 4.1512 (3.8848) grad_norm: 2.4656 (2.4487) time: 2.3808 data: 0.0003 max mem: 8426 +[2024-12-11 06:14:51 root] (utils.py 283): INFO Epoch: [28] [1700/2502] eta: 0:31:48 lr: 0.000001 loss_cls: 4.1512 (3.8870) grad_norm: 2.4060 (2.4485) time: 2.3849 data: 0.0003 max mem: 8426 +[2024-12-11 06:15:15 root] (utils.py 283): INFO Epoch: [28] [1710/2502] eta: 0:31:24 lr: 0.000001 loss_cls: 4.1382 (3.8868) grad_norm: 2.3737 (2.4481) time: 2.3684 data: 0.0003 max mem: 8426 +[2024-12-11 06:15:38 root] (utils.py 283): INFO Epoch: [28] [1720/2502] eta: 0:31:00 lr: 0.000001 loss_cls: 4.0349 (3.8877) grad_norm: 2.4093 (2.4484) time: 2.3375 data: 0.0003 max mem: 8426 +[2024-12-11 06:16:01 root] (utils.py 283): INFO Epoch: [28] [1730/2502] eta: 0:30:36 lr: 0.000001 loss_cls: 4.0423 (3.8862) grad_norm: 2.4392 (2.4485) time: 2.3211 data: 0.0003 max mem: 8426 +[2024-12-11 06:16:25 root] (utils.py 283): INFO Epoch: [28] [1740/2502] eta: 0:30:12 lr: 0.000001 loss_cls: 3.5353 (3.8845) grad_norm: 2.3966 (2.4482) time: 2.3243 data: 0.0002 max mem: 8426 +[2024-12-11 06:16:48 root] (utils.py 283): INFO Epoch: [28] [1750/2502] eta: 0:29:48 lr: 0.000001 loss_cls: 3.6816 (3.8838) grad_norm: 2.4040 (2.4487) time: 2.3294 data: 0.0002 max mem: 8426 +[2024-12-11 06:17:11 root] (utils.py 283): INFO Epoch: [28] [1760/2502] eta: 0:29:24 lr: 0.000001 loss_cls: 3.9928 (3.8848) grad_norm: 2.3998 (2.4484) time: 2.3409 data: 0.0002 max mem: 8426 +[2024-12-11 06:17:35 root] (utils.py 283): INFO Epoch: [28] [1770/2502] eta: 0:29:00 lr: 0.000001 loss_cls: 3.9928 (3.8844) grad_norm: 2.3808 (2.4480) time: 2.3540 data: 0.0003 max mem: 8426 +[2024-12-11 06:17:59 root] (utils.py 283): INFO Epoch: [28] [1780/2502] eta: 0:28:36 lr: 0.000001 loss_cls: 3.9301 (3.8843) grad_norm: 2.4491 (2.4484) time: 2.3668 data: 0.0003 max mem: 8426 +[2024-12-11 06:18:23 root] (utils.py 283): INFO Epoch: [28] [1790/2502] eta: 0:28:13 lr: 0.000001 loss_cls: 3.8705 (3.8839) grad_norm: 2.4496 (2.4486) time: 2.4030 data: 0.0002 max mem: 8426 +[2024-12-11 06:18:48 root] (utils.py 283): INFO Epoch: [28] [1800/2502] eta: 0:27:49 lr: 0.000001 loss_cls: 3.8705 (3.8843) grad_norm: 2.4034 (2.4486) time: 2.4449 data: 0.0002 max mem: 8426 +[2024-12-11 06:19:11 root] (utils.py 283): INFO Epoch: [28] [1810/2502] eta: 0:27:25 lr: 0.000001 loss_cls: 4.0912 (3.8849) grad_norm: 2.3784 (2.4484) time: 2.4127 data: 0.0003 max mem: 8426 +[2024-12-11 06:19:36 root] (utils.py 283): INFO Epoch: [28] [1820/2502] eta: 0:27:02 lr: 0.000001 loss_cls: 4.0699 (3.8831) grad_norm: 2.3762 (2.4477) time: 2.4013 data: 0.0003 max mem: 8426 +[2024-12-11 06:19:59 root] (utils.py 283): INFO Epoch: [28] [1830/2502] eta: 0:26:38 lr: 0.000001 loss_cls: 3.7405 (3.8822) grad_norm: 2.4005 (2.4479) time: 2.4055 data: 0.0003 max mem: 8426 +[2024-12-11 06:20:23 root] (utils.py 283): INFO Epoch: [28] [1840/2502] eta: 0:26:14 lr: 0.000001 loss_cls: 3.7405 (3.8821) grad_norm: 2.4227 (2.4479) time: 2.3480 data: 0.0002 max mem: 8426 +[2024-12-11 06:20:46 root] (utils.py 283): INFO Epoch: [28] [1850/2502] eta: 0:25:50 lr: 0.000001 loss_cls: 3.6527 (3.8814) grad_norm: 2.4395 (2.4482) time: 2.3169 data: 0.0002 max mem: 8426 +[2024-12-11 06:21:09 root] (utils.py 283): INFO Epoch: [28] [1860/2502] eta: 0:25:26 lr: 0.000001 loss_cls: 3.8079 (3.8810) grad_norm: 2.4997 (2.4485) time: 2.3020 data: 0.0002 max mem: 8426 +[2024-12-11 06:21:32 root] (utils.py 283): INFO Epoch: [28] [1870/2502] eta: 0:25:02 lr: 0.000001 loss_cls: 3.8079 (3.8792) grad_norm: 2.4080 (2.4482) time: 2.3349 data: 0.0002 max mem: 8426 +[2024-12-11 06:21:55 root] (utils.py 283): INFO Epoch: [28] [1880/2502] eta: 0:24:38 lr: 0.000001 loss_cls: 3.9877 (3.8800) grad_norm: 2.3851 (2.4478) time: 2.3417 data: 0.0003 max mem: 8426 +[2024-12-11 06:22:19 root] (utils.py 283): INFO Epoch: [28] [1890/2502] eta: 0:24:14 lr: 0.000001 loss_cls: 3.9877 (3.8796) grad_norm: 2.4134 (2.4478) time: 2.3398 data: 0.0003 max mem: 8426 +[2024-12-11 06:22:43 root] (utils.py 283): INFO Epoch: [28] [1900/2502] eta: 0:23:51 lr: 0.000001 loss_cls: 4.0733 (3.8807) grad_norm: 2.4332 (2.4480) time: 2.3656 data: 0.0003 max mem: 8426 +[2024-12-11 06:23:06 root] (utils.py 283): INFO Epoch: [28] [1910/2502] eta: 0:23:27 lr: 0.000001 loss_cls: 4.0733 (3.8803) grad_norm: 2.4401 (2.4479) time: 2.3370 data: 0.0003 max mem: 8426 +[2024-12-11 06:23:30 root] (utils.py 283): INFO Epoch: [28] [1920/2502] eta: 0:23:03 lr: 0.000001 loss_cls: 4.0348 (3.8814) grad_norm: 2.4401 (2.4479) time: 2.3475 data: 0.0003 max mem: 8426 +[2024-12-11 06:23:53 root] (utils.py 283): INFO Epoch: [28] [1930/2502] eta: 0:22:39 lr: 0.000001 loss_cls: 4.3439 (3.8826) grad_norm: 2.4468 (2.4481) time: 2.3575 data: 0.0003 max mem: 8426 +[2024-12-11 06:24:17 root] (utils.py 283): INFO Epoch: [28] [1940/2502] eta: 0:22:15 lr: 0.000001 loss_cls: 4.0654 (3.8818) grad_norm: 2.4192 (2.4479) time: 2.3417 data: 0.0003 max mem: 8426 +[2024-12-11 06:24:40 root] (utils.py 283): INFO Epoch: [28] [1950/2502] eta: 0:21:51 lr: 0.000001 loss_cls: 4.0654 (3.8828) grad_norm: 2.4192 (2.4479) time: 2.3344 data: 0.0002 max mem: 8426 +[2024-12-11 06:25:03 root] (utils.py 283): INFO Epoch: [28] [1960/2502] eta: 0:21:27 lr: 0.000001 loss_cls: 4.2813 (3.8844) grad_norm: 2.4335 (2.4479) time: 2.3309 data: 0.0003 max mem: 8426 +[2024-12-11 06:25:26 root] (utils.py 283): INFO Epoch: [28] [1970/2502] eta: 0:21:03 lr: 0.000001 loss_cls: 4.0211 (3.8846) grad_norm: 2.4858 (2.4481) time: 2.3248 data: 0.0003 max mem: 8426 +[2024-12-11 06:25:50 root] (utils.py 283): INFO Epoch: [28] [1980/2502] eta: 0:20:40 lr: 0.000001 loss_cls: 3.9860 (3.8847) grad_norm: 2.4421 (2.4480) time: 2.3332 data: 0.0003 max mem: 8426 +[2024-12-11 06:26:13 root] (utils.py 283): INFO Epoch: [28] [1990/2502] eta: 0:20:16 lr: 0.000001 loss_cls: 3.9809 (3.8838) grad_norm: 2.4368 (2.4481) time: 2.3584 data: 0.0003 max mem: 8426 +[2024-12-11 06:26:37 root] (utils.py 283): INFO Epoch: [28] [2000/2502] eta: 0:19:52 lr: 0.000001 loss_cls: 3.7182 (3.8825) grad_norm: 2.4226 (2.4480) time: 2.3678 data: 0.0003 max mem: 8426 +[2024-12-11 06:27:01 root] (utils.py 283): INFO Epoch: [28] [2010/2502] eta: 0:19:28 lr: 0.000001 loss_cls: 3.8690 (3.8823) grad_norm: 2.4573 (2.4482) time: 2.3686 data: 0.0003 max mem: 8426 +[2024-12-11 06:27:24 root] (utils.py 283): INFO Epoch: [28] [2020/2502] eta: 0:19:04 lr: 0.000001 loss_cls: 3.9480 (3.8822) grad_norm: 2.3993 (2.4477) time: 2.3368 data: 0.0003 max mem: 8426 +[2024-12-11 06:27:48 root] (utils.py 283): INFO Epoch: [28] [2030/2502] eta: 0:18:41 lr: 0.000001 loss_cls: 4.0202 (3.8835) grad_norm: 2.3636 (2.4474) time: 2.3377 data: 0.0002 max mem: 8426 +[2024-12-11 06:28:11 root] (utils.py 283): INFO Epoch: [28] [2040/2502] eta: 0:18:17 lr: 0.000001 loss_cls: 4.1631 (3.8839) grad_norm: 2.4236 (2.4474) time: 2.3514 data: 0.0003 max mem: 8426 +[2024-12-11 06:28:35 root] (utils.py 283): INFO Epoch: [28] [2050/2502] eta: 0:17:53 lr: 0.000001 loss_cls: 4.0767 (3.8837) grad_norm: 2.4092 (2.4471) time: 2.3527 data: 0.0002 max mem: 8426 +[2024-12-11 06:28:58 root] (utils.py 283): INFO Epoch: [28] [2060/2502] eta: 0:17:29 lr: 0.000001 loss_cls: 4.1394 (3.8853) grad_norm: 2.4132 (2.4472) time: 2.3708 data: 0.0002 max mem: 8426 +[2024-12-11 06:29:23 root] (utils.py 283): INFO Epoch: [28] [2070/2502] eta: 0:17:06 lr: 0.000001 loss_cls: 4.1741 (3.8854) grad_norm: 2.4132 (2.4470) time: 2.4029 data: 0.0002 max mem: 8426 +[2024-12-11 06:29:47 root] (utils.py 283): INFO Epoch: [28] [2080/2502] eta: 0:16:42 lr: 0.000001 loss_cls: 3.9912 (3.8845) grad_norm: 2.3825 (2.4466) time: 2.4314 data: 0.0003 max mem: 8426 +[2024-12-11 06:30:11 root] (utils.py 283): INFO Epoch: [28] [2090/2502] eta: 0:16:18 lr: 0.000001 loss_cls: 3.6978 (3.8842) grad_norm: 2.3839 (2.4466) time: 2.4203 data: 0.0002 max mem: 8426 +[2024-12-11 06:30:35 root] (utils.py 283): INFO Epoch: [28] [2100/2502] eta: 0:15:54 lr: 0.000001 loss_cls: 3.7353 (3.8836) grad_norm: 2.3594 (2.4463) time: 2.3839 data: 0.0002 max mem: 8426 +[2024-12-11 06:30:58 root] (utils.py 283): INFO Epoch: [28] [2110/2502] eta: 0:15:31 lr: 0.000001 loss_cls: 3.7353 (3.8828) grad_norm: 2.3708 (2.4462) time: 2.3592 data: 0.0002 max mem: 8426 +[2024-12-11 06:31:22 root] (utils.py 283): INFO Epoch: [28] [2120/2502] eta: 0:15:07 lr: 0.000001 loss_cls: 3.7615 (3.8826) grad_norm: 2.4677 (2.4465) time: 2.3688 data: 0.0003 max mem: 8426 +[2024-12-11 06:31:46 root] (utils.py 283): INFO Epoch: [28] [2130/2502] eta: 0:14:43 lr: 0.000001 loss_cls: 3.7987 (3.8818) grad_norm: 2.4962 (2.4466) time: 2.3588 data: 0.0003 max mem: 8426 +[2024-12-11 06:32:09 root] (utils.py 283): INFO Epoch: [28] [2140/2502] eta: 0:14:19 lr: 0.000001 loss_cls: 3.5166 (3.8810) grad_norm: 2.4144 (2.4462) time: 2.3605 data: 0.0003 max mem: 8426 +[2024-12-11 06:32:33 root] (utils.py 283): INFO Epoch: [28] [2150/2502] eta: 0:13:56 lr: 0.000001 loss_cls: 3.6737 (3.8805) grad_norm: 2.3874 (2.4461) time: 2.3774 data: 0.0003 max mem: 8426 +[2024-12-11 06:32:58 root] (utils.py 283): INFO Epoch: [28] [2160/2502] eta: 0:13:32 lr: 0.000001 loss_cls: 3.9581 (3.8818) grad_norm: 2.4010 (2.4462) time: 2.4123 data: 0.0003 max mem: 8426 +[2024-12-11 06:33:22 root] (utils.py 283): INFO Epoch: [28] [2170/2502] eta: 0:13:08 lr: 0.000001 loss_cls: 4.1327 (3.8825) grad_norm: 2.3501 (2.4464) time: 2.4398 data: 0.0003 max mem: 8426 +[2024-12-11 06:33:45 root] (utils.py 283): INFO Epoch: [28] [2180/2502] eta: 0:12:45 lr: 0.000001 loss_cls: 4.1590 (3.8832) grad_norm: 2.4702 (2.4465) time: 2.3929 data: 0.0003 max mem: 8426 +[2024-12-11 06:34:14 root] (utils.py 283): INFO Epoch: [28] [2190/2502] eta: 0:12:21 lr: 0.000001 loss_cls: 4.1967 (3.8829) grad_norm: 2.4905 (2.4467) time: 2.5958 data: 0.0003 max mem: 8426 +[2024-12-11 06:34:49 root] (utils.py 283): INFO Epoch: [28] [2200/2502] eta: 0:11:59 lr: 0.000001 loss_cls: 3.9586 (3.8828) grad_norm: 2.4630 (2.4468) time: 3.2014 data: 0.0003 max mem: 8426 +[2024-12-11 06:35:13 root] (utils.py 283): INFO Epoch: [28] [2210/2502] eta: 0:11:35 lr: 0.000001 loss_cls: 4.0676 (3.8835) grad_norm: 2.4210 (2.4466) time: 2.9780 data: 0.0003 max mem: 8426 +[2024-12-11 06:35:37 root] (utils.py 283): INFO Epoch: [28] [2220/2502] eta: 0:11:12 lr: 0.000001 loss_cls: 4.2490 (3.8843) grad_norm: 2.3786 (2.4467) time: 2.3983 data: 0.0003 max mem: 8426 +[2024-12-11 06:36:01 root] (utils.py 283): INFO Epoch: [28] [2230/2502] eta: 0:10:48 lr: 0.000001 loss_cls: 4.2418 (3.8850) grad_norm: 2.4543 (2.4469) time: 2.3602 data: 0.0002 max mem: 8426 +[2024-12-11 06:36:24 root] (utils.py 283): INFO Epoch: [28] [2240/2502] eta: 0:10:24 lr: 0.000001 loss_cls: 3.9674 (3.8847) grad_norm: 2.4552 (2.4468) time: 2.3327 data: 0.0003 max mem: 8426 +[2024-12-11 06:36:48 root] (utils.py 283): INFO Epoch: [28] [2250/2502] eta: 0:10:00 lr: 0.000001 loss_cls: 3.9067 (3.8849) grad_norm: 2.3622 (2.4468) time: 2.3601 data: 0.0003 max mem: 8426 +[2024-12-11 06:37:11 root] (utils.py 283): INFO Epoch: [28] [2260/2502] eta: 0:09:36 lr: 0.000001 loss_cls: 4.1462 (3.8850) grad_norm: 2.3812 (2.4469) time: 2.3708 data: 0.0002 max mem: 8426 +[2024-12-11 06:37:36 root] (utils.py 283): INFO Epoch: [28] [2270/2502] eta: 0:09:12 lr: 0.000001 loss_cls: 3.8656 (3.8844) grad_norm: 2.4059 (2.4468) time: 2.4213 data: 0.0002 max mem: 8426 +[2024-12-11 06:38:00 root] (utils.py 283): INFO Epoch: [28] [2280/2502] eta: 0:08:49 lr: 0.000001 loss_cls: 3.8656 (3.8842) grad_norm: 2.4245 (2.4469) time: 2.4288 data: 0.0002 max mem: 8426 +[2024-12-11 06:38:24 root] (utils.py 283): INFO Epoch: [28] [2290/2502] eta: 0:08:25 lr: 0.000001 loss_cls: 3.9346 (3.8840) grad_norm: 2.4428 (2.4470) time: 2.3863 data: 0.0003 max mem: 8426 +[2024-12-11 06:38:47 root] (utils.py 283): INFO Epoch: [28] [2300/2502] eta: 0:08:01 lr: 0.000001 loss_cls: 3.5935 (3.8828) grad_norm: 2.4352 (2.4469) time: 2.3694 data: 0.0003 max mem: 8426 +[2024-12-11 06:39:11 root] (utils.py 283): INFO Epoch: [28] [2310/2502] eta: 0:07:37 lr: 0.000001 loss_cls: 3.7081 (3.8820) grad_norm: 2.4568 (2.4471) time: 2.3441 data: 0.0003 max mem: 8426 +[2024-12-11 06:39:34 root] (utils.py 283): INFO Epoch: [28] [2320/2502] eta: 0:07:13 lr: 0.000001 loss_cls: 3.9619 (3.8821) grad_norm: 2.4964 (2.4472) time: 2.3291 data: 0.0003 max mem: 8426 +[2024-12-11 06:39:58 root] (utils.py 283): INFO Epoch: [28] [2330/2502] eta: 0:06:49 lr: 0.000001 loss_cls: 4.0144 (3.8827) grad_norm: 2.4807 (2.4472) time: 2.3397 data: 0.0003 max mem: 8426 +[2024-12-11 06:40:21 root] (utils.py 283): INFO Epoch: [28] [2340/2502] eta: 0:06:25 lr: 0.000001 loss_cls: 4.1647 (3.8830) grad_norm: 2.3600 (2.4470) time: 2.3489 data: 0.0002 max mem: 8426 +[2024-12-11 06:40:45 root] (utils.py 283): INFO Epoch: [28] [2350/2502] eta: 0:06:02 lr: 0.000001 loss_cls: 4.0123 (3.8825) grad_norm: 2.3715 (2.4470) time: 2.3527 data: 0.0003 max mem: 8426 +[2024-12-11 06:41:08 root] (utils.py 283): INFO Epoch: [28] [2360/2502] eta: 0:05:38 lr: 0.000001 loss_cls: 4.0123 (3.8828) grad_norm: 2.4239 (2.4470) time: 2.3571 data: 0.0003 max mem: 8426 +[2024-12-11 06:41:32 root] (utils.py 283): INFO Epoch: [28] [2370/2502] eta: 0:05:14 lr: 0.000001 loss_cls: 3.7970 (3.8816) grad_norm: 2.4623 (2.4473) time: 2.3881 data: 0.0003 max mem: 8426 +[2024-12-11 06:41:56 root] (utils.py 283): INFO Epoch: [28] [2380/2502] eta: 0:04:50 lr: 0.000001 loss_cls: 3.6915 (3.8805) grad_norm: 2.4506 (2.4471) time: 2.3950 data: 0.0003 max mem: 8426 +[2024-12-11 06:42:20 root] (utils.py 283): INFO Epoch: [28] [2390/2502] eta: 0:04:26 lr: 0.000001 loss_cls: 4.1454 (3.8811) grad_norm: 2.4106 (2.4470) time: 2.3953 data: 0.0003 max mem: 8426 +[2024-12-11 06:42:44 root] (utils.py 283): INFO Epoch: [28] [2400/2502] eta: 0:04:02 lr: 0.000001 loss_cls: 4.2734 (3.8817) grad_norm: 2.4106 (2.4468) time: 2.3773 data: 0.0002 max mem: 8426 +[2024-12-11 06:43:08 root] (utils.py 283): INFO Epoch: [28] [2410/2502] eta: 0:03:39 lr: 0.000001 loss_cls: 4.1073 (3.8820) grad_norm: 2.3588 (2.4465) time: 2.3596 data: 0.0003 max mem: 8426 +[2024-12-11 06:43:31 root] (utils.py 283): INFO Epoch: [28] [2420/2502] eta: 0:03:15 lr: 0.000001 loss_cls: 3.8826 (3.8816) grad_norm: 2.3771 (2.4466) time: 2.3494 data: 0.0003 max mem: 8426 +[2024-12-11 06:43:54 root] (utils.py 283): INFO Epoch: [28] [2430/2502] eta: 0:02:51 lr: 0.000001 loss_cls: 3.8269 (3.8808) grad_norm: 2.3855 (2.4466) time: 2.3342 data: 0.0003 max mem: 8426 +[2024-12-11 06:44:18 root] (utils.py 283): INFO Epoch: [28] [2440/2502] eta: 0:02:27 lr: 0.000001 loss_cls: 3.7521 (3.8800) grad_norm: 2.3928 (2.4469) time: 2.3917 data: 0.0003 max mem: 8426 +[2024-12-11 06:44:43 root] (utils.py 283): INFO Epoch: [28] [2450/2502] eta: 0:02:03 lr: 0.000001 loss_cls: 3.8335 (3.8800) grad_norm: 2.4112 (2.4467) time: 2.4152 data: 0.0002 max mem: 8426 +[2024-12-11 06:45:06 root] (utils.py 283): INFO Epoch: [28] [2460/2502] eta: 0:01:40 lr: 0.000001 loss_cls: 3.8680 (3.8788) grad_norm: 2.4093 (2.4466) time: 2.3732 data: 0.0003 max mem: 8426 +[2024-12-11 06:45:30 root] (utils.py 283): INFO Epoch: [28] [2470/2502] eta: 0:01:16 lr: 0.000001 loss_cls: 3.6432 (3.8778) grad_norm: 2.4340 (2.4469) time: 2.3631 data: 0.0003 max mem: 8426 +[2024-12-11 06:45:53 root] (utils.py 283): INFO Epoch: [28] [2480/2502] eta: 0:00:52 lr: 0.000001 loss_cls: 3.8783 (3.8773) grad_norm: 2.4832 (2.4469) time: 2.3563 data: 0.0003 max mem: 8426 +[2024-12-11 06:46:16 root] (utils.py 283): INFO Epoch: [28] [2490/2502] eta: 0:00:28 lr: 0.000001 loss_cls: 4.0119 (3.8775) grad_norm: 2.4788 (2.4470) time: 2.3241 data: 0.0285 max mem: 8426 +[2024-12-11 06:46:40 root] (utils.py 283): INFO Epoch: [28] [2500/2502] eta: 0:00:04 lr: 0.000001 loss_cls: 3.9610 (3.8773) grad_norm: 2.3971 (2.4467) time: 2.3674 data: 0.0285 max mem: 8426 +[2024-12-11 06:46:43 root] (utils.py 283): INFO Epoch: [28] [2501/2502] eta: 0:00:02 lr: 0.000001 loss_cls: 3.7657 (3.8772) grad_norm: 2.3891 (2.4467) time: 2.3848 data: 0.0285 max mem: 8426 +[2024-12-11 06:46:43 root] (utils.py 297): INFO Epoch: [28] Total time: 1:39:19 (2.3819 s / it) +[2024-12-11 06:46:43 root] (engine.py 179): INFO Averaged stats:lr: 0.000001 loss_cls: 3.7657 (3.8789) grad_norm: 2.3891 (2.4467) +[2024-12-11 06:46:44 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:28 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6170 (0.6170) acc1: 86.7188 (86.7188) acc3: 96.8750 (96.8750) acc5: 98.4375 (98.4375) time: 0.2908 data: 0.0003 max mem: 8426 +[2024-12-11 06:46:47 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:25 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7120 (0.8000) acc1: 85.9375 (82.8125) acc3: 95.3125 (93.9631) acc5: 97.6562 (96.5909) time: 0.2878 data: 0.0003 max mem: 8426 +[2024-12-11 06:46:49 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:20 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8428 (0.8548) acc1: 80.4688 (81.4732) acc3: 92.9688 (93.2292) acc5: 95.3125 (95.7217) time: 0.2619 data: 0.0004 max mem: 8426 +[2024-12-11 06:46:51 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:16 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9410 (0.8667) acc1: 79.6875 (80.6956) acc3: 92.9688 (93.3972) acc5: 95.3125 (95.9173) time: 0.2074 data: 0.0004 max mem: 8426 +[2024-12-11 06:46:53 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8069 (0.8554) acc1: 80.4688 (81.0976) acc3: 94.5312 (93.4261) acc5: 96.8750 (95.9794) time: 0.1938 data: 0.0005 max mem: 8426 +[2024-12-11 06:46:55 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0217 (0.9395) acc1: 75.7812 (79.0441) acc3: 88.2812 (92.0650) acc5: 92.9688 (94.9449) time: 0.2031 data: 0.0005 max mem: 8426 +[2024-12-11 06:46:57 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2119 (0.9853) acc1: 71.0938 (78.2147) acc3: 85.9375 (91.1757) acc5: 89.8438 (94.0830) time: 0.1900 data: 0.0004 max mem: 8426 +[2024-12-11 06:46:59 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1795 (1.0250) acc1: 74.2188 (77.2777) acc3: 86.7188 (90.6910) acc5: 90.6250 (93.6510) time: 0.2045 data: 0.0004 max mem: 8426 +[2024-12-11 06:47:01 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1874 (1.0591) acc1: 72.6562 (76.4853) acc3: 86.7188 (90.0945) acc5: 89.8438 (93.0748) time: 0.2030 data: 0.0007 max mem: 8426 +[2024-12-11 06:47:02 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2177 (1.0861) acc1: 69.5312 (75.7812) acc3: 86.7188 (89.7407) acc5: 89.8438 (92.8228) time: 0.1825 data: 0.0007 max mem: 8426 +[2024-12-11 06:47:04 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1327 (1.0746) acc1: 74.2188 (76.0080) acc3: 88.2812 (89.9120) acc5: 91.4062 (92.9760) time: 0.2013 data: 0.0005 max mem: 8426 +[2024-12-11 06:47:04 root] (utils.py 297): INFO Test: Total time: 0:00:20 (0.2119 s / it) +[2024-12-11 06:47:04 root] (engine.py 264): INFO * Acc@1 75.890 Acc@3 89.754 Acc@5 93.026 loss 1.076 flops 1.285 layer_flops 1.251 +[2024-12-11 06:47:04 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.9% +[2024-12-11 06:47:04 root] (main.py 576): INFO Max accuracy: 75.89% +[2024-12-11 06:47:06 root] (utils.py 283): INFO Epoch: [29] [ 0/2502] eta: 1:38:02 lr: 0.000001 loss_cls: 4.2026 (4.2026) grad_norm: 2.3142 (2.3142) time: 2.3509 data: 0.0004 max mem: 8426 +[2024-12-11 06:47:29 root] (utils.py 283): INFO Epoch: [29] [ 10/2502] eta: 1:35:14 lr: 0.000001 loss_cls: 3.6451 (3.5802) grad_norm: 2.3577 (2.3937) time: 2.2930 data: 0.0003 max mem: 8426 +[2024-12-11 06:47:52 root] (utils.py 283): INFO Epoch: [29] [ 20/2502] eta: 1:35:00 lr: 0.000001 loss_cls: 3.6451 (3.6992) grad_norm: 2.3854 (2.4450) time: 2.2939 data: 0.0003 max mem: 8426 +[2024-12-11 06:48:16 root] (utils.py 283): INFO Epoch: [29] [ 30/2502] eta: 1:35:35 lr: 0.000001 loss_cls: 4.0253 (3.6835) grad_norm: 2.4409 (2.4678) time: 2.3350 data: 0.0003 max mem: 8426 +[2024-12-11 06:48:39 root] (utils.py 283): INFO Epoch: [29] [ 40/2502] eta: 1:35:15 lr: 0.000001 loss_cls: 4.0253 (3.7148) grad_norm: 2.4524 (2.4546) time: 2.3480 data: 0.0003 max mem: 8426 +[2024-12-11 06:49:03 root] (utils.py 283): INFO Epoch: [29] [ 50/2502] eta: 1:35:20 lr: 0.000001 loss_cls: 3.8014 (3.7158) grad_norm: 2.3843 (2.4446) time: 2.3529 data: 0.0003 max mem: 8426 +[2024-12-11 06:49:27 root] (utils.py 283): INFO Epoch: [29] [ 60/2502] eta: 1:35:00 lr: 0.000001 loss_cls: 3.5866 (3.6875) grad_norm: 2.4006 (2.4382) time: 2.3604 data: 0.0002 max mem: 8426 +[2024-12-11 06:49:50 root] (utils.py 283): INFO Epoch: [29] [ 70/2502] eta: 1:34:32 lr: 0.000001 loss_cls: 3.7755 (3.7224) grad_norm: 2.4072 (2.4332) time: 2.3319 data: 0.0002 max mem: 8426 +[2024-12-11 06:50:13 root] (utils.py 283): INFO Epoch: [29] [ 80/2502] eta: 1:33:56 lr: 0.000001 loss_cls: 4.1866 (3.7794) grad_norm: 2.3970 (2.4302) time: 2.3053 data: 0.0002 max mem: 8426 +[2024-12-11 06:50:36 root] (utils.py 283): INFO Epoch: [29] [ 90/2502] eta: 1:33:32 lr: 0.000001 loss_cls: 4.2667 (3.8253) grad_norm: 2.4290 (2.4325) time: 2.3059 data: 0.0003 max mem: 8426 +[2024-12-11 06:50:59 root] (utils.py 283): INFO Epoch: [29] [ 100/2502] eta: 1:33:07 lr: 0.000001 loss_cls: 4.0974 (3.8272) grad_norm: 2.4386 (2.4337) time: 2.3213 data: 0.0003 max mem: 8426 +[2024-12-11 06:51:22 root] (utils.py 283): INFO Epoch: [29] [ 110/2502] eta: 1:32:43 lr: 0.000001 loss_cls: 3.8990 (3.8282) grad_norm: 2.4388 (2.4343) time: 2.3206 data: 0.0002 max mem: 8426 +[2024-12-11 06:51:46 root] (utils.py 283): INFO Epoch: [29] [ 120/2502] eta: 1:32:34 lr: 0.000001 loss_cls: 3.9830 (3.8095) grad_norm: 2.4438 (2.4370) time: 2.3626 data: 0.0002 max mem: 8426 +[2024-12-11 06:52:10 root] (utils.py 283): INFO Epoch: [29] [ 130/2502] eta: 1:32:14 lr: 0.000001 loss_cls: 3.8761 (3.8140) grad_norm: 2.4887 (2.4431) time: 2.3744 data: 0.0003 max mem: 8426 +[2024-12-11 06:52:33 root] (utils.py 283): INFO Epoch: [29] [ 140/2502] eta: 1:31:52 lr: 0.000001 loss_cls: 4.1286 (3.8266) grad_norm: 2.5387 (2.4499) time: 2.3448 data: 0.0003 max mem: 8426 +[2024-12-11 06:52:56 root] (utils.py 283): INFO Epoch: [29] [ 150/2502] eta: 1:31:24 lr: 0.000001 loss_cls: 4.1849 (3.8404) grad_norm: 2.4280 (2.4482) time: 2.3247 data: 0.0003 max mem: 8426 +[2024-12-11 06:53:19 root] (utils.py 283): INFO Epoch: [29] [ 160/2502] eta: 1:31:00 lr: 0.000001 loss_cls: 4.1538 (3.8531) grad_norm: 2.4022 (2.4479) time: 2.3139 data: 0.0002 max mem: 8426 +[2024-12-11 06:53:43 root] (utils.py 283): INFO Epoch: [29] [ 170/2502] eta: 1:30:35 lr: 0.000001 loss_cls: 4.1675 (3.8629) grad_norm: 2.3849 (2.4448) time: 2.3209 data: 0.0002 max mem: 8426 +[2024-12-11 06:54:06 root] (utils.py 283): INFO Epoch: [29] [ 180/2502] eta: 1:30:17 lr: 0.000001 loss_cls: 4.1675 (3.8618) grad_norm: 2.3790 (2.4448) time: 2.3490 data: 0.0002 max mem: 8426 +[2024-12-11 06:54:30 root] (utils.py 283): INFO Epoch: [29] [ 190/2502] eta: 1:29:56 lr: 0.000001 loss_cls: 4.0744 (3.8613) grad_norm: 2.4257 (2.4490) time: 2.3635 data: 0.0003 max mem: 8426 +[2024-12-11 06:54:53 root] (utils.py 283): INFO Epoch: [29] [ 200/2502] eta: 1:29:32 lr: 0.000001 loss_cls: 3.7747 (3.8571) grad_norm: 2.4354 (2.4499) time: 2.3404 data: 0.0003 max mem: 8426 +[2024-12-11 06:55:18 root] (utils.py 283): INFO Epoch: [29] [ 210/2502] eta: 1:29:23 lr: 0.000001 loss_cls: 3.7765 (3.8622) grad_norm: 2.4658 (2.4510) time: 2.3986 data: 0.0003 max mem: 8426 +[2024-12-11 06:55:42 root] (utils.py 283): INFO Epoch: [29] [ 220/2502] eta: 1:29:05 lr: 0.000001 loss_cls: 3.9226 (3.8531) grad_norm: 2.4772 (2.4513) time: 2.4251 data: 0.0003 max mem: 8426 +[2024-12-11 06:56:05 root] (utils.py 283): INFO Epoch: [29] [ 230/2502] eta: 1:28:42 lr: 0.000001 loss_cls: 3.8173 (3.8472) grad_norm: 2.4398 (2.4507) time: 2.3679 data: 0.0002 max mem: 8426 +[2024-12-11 06:56:29 root] (utils.py 283): INFO Epoch: [29] [ 240/2502] eta: 1:28:23 lr: 0.000001 loss_cls: 3.9036 (3.8506) grad_norm: 2.3628 (2.4470) time: 2.3698 data: 0.0003 max mem: 8426 +[2024-12-11 06:56:53 root] (utils.py 283): INFO Epoch: [29] [ 250/2502] eta: 1:28:02 lr: 0.000001 loss_cls: 3.6910 (3.8456) grad_norm: 2.4196 (2.4507) time: 2.3817 data: 0.0003 max mem: 8426 +[2024-12-11 06:57:17 root] (utils.py 283): INFO Epoch: [29] [ 260/2502] eta: 1:27:44 lr: 0.000001 loss_cls: 3.7460 (3.8585) grad_norm: 2.4690 (2.4501) time: 2.3914 data: 0.0003 max mem: 8426 +[2024-12-11 06:57:41 root] (utils.py 283): INFO Epoch: [29] [ 270/2502] eta: 1:27:23 lr: 0.000001 loss_cls: 4.1191 (3.8605) grad_norm: 2.4354 (2.4520) time: 2.3926 data: 0.0003 max mem: 8426 +[2024-12-11 06:58:04 root] (utils.py 283): INFO Epoch: [29] [ 280/2502] eta: 1:27:01 lr: 0.000001 loss_cls: 4.1138 (3.8647) grad_norm: 2.4491 (2.4507) time: 2.3707 data: 0.0003 max mem: 8426 +[2024-12-11 06:58:28 root] (utils.py 283): INFO Epoch: [29] [ 290/2502] eta: 1:26:39 lr: 0.000001 loss_cls: 4.1175 (3.8661) grad_norm: 2.4377 (2.4511) time: 2.3693 data: 0.0003 max mem: 8426 +[2024-12-11 06:58:53 root] (utils.py 283): INFO Epoch: [29] [ 300/2502] eta: 1:26:24 lr: 0.000001 loss_cls: 3.9069 (3.8662) grad_norm: 2.4003 (2.4491) time: 2.4237 data: 0.0002 max mem: 8426 +[2024-12-11 06:59:17 root] (utils.py 283): INFO Epoch: [29] [ 310/2502] eta: 1:26:05 lr: 0.000001 loss_cls: 3.8037 (3.8648) grad_norm: 2.4234 (2.4523) time: 2.4444 data: 0.0003 max mem: 8426 +[2024-12-11 06:59:41 root] (utils.py 283): INFO Epoch: [29] [ 320/2502] eta: 1:25:41 lr: 0.000001 loss_cls: 3.9673 (3.8662) grad_norm: 2.4686 (2.4518) time: 2.3808 data: 0.0003 max mem: 8426 +[2024-12-11 07:00:04 root] (utils.py 283): INFO Epoch: [29] [ 330/2502] eta: 1:25:18 lr: 0.000001 loss_cls: 4.1062 (3.8723) grad_norm: 2.4072 (2.4522) time: 2.3554 data: 0.0003 max mem: 8426 +[2024-12-11 07:00:28 root] (utils.py 283): INFO Epoch: [29] [ 340/2502] eta: 1:24:55 lr: 0.000001 loss_cls: 4.2448 (3.8782) grad_norm: 2.4265 (2.4522) time: 2.3664 data: 0.0003 max mem: 8426 +[2024-12-11 07:00:51 root] (utils.py 283): INFO Epoch: [29] [ 350/2502] eta: 1:24:31 lr: 0.000001 loss_cls: 4.2554 (3.8778) grad_norm: 2.3836 (2.4499) time: 2.3603 data: 0.0003 max mem: 8426 +[2024-12-11 07:01:15 root] (utils.py 283): INFO Epoch: [29] [ 360/2502] eta: 1:24:08 lr: 0.000001 loss_cls: 4.1835 (3.8826) grad_norm: 2.3796 (2.4492) time: 2.3582 data: 0.0003 max mem: 8426 +[2024-12-11 07:01:38 root] (utils.py 283): INFO Epoch: [29] [ 370/2502] eta: 1:23:40 lr: 0.000001 loss_cls: 4.1835 (3.8818) grad_norm: 2.4331 (2.4499) time: 2.3195 data: 0.0003 max mem: 8426 +[2024-12-11 07:02:01 root] (utils.py 283): INFO Epoch: [29] [ 380/2502] eta: 1:23:15 lr: 0.000001 loss_cls: 3.8819 (3.8787) grad_norm: 2.3705 (2.4484) time: 2.2999 data: 0.0002 max mem: 8426 +[2024-12-11 07:02:25 root] (utils.py 283): INFO Epoch: [29] [ 390/2502] eta: 1:22:51 lr: 0.000001 loss_cls: 3.8819 (3.8794) grad_norm: 2.3204 (2.4493) time: 2.3383 data: 0.0003 max mem: 8426 +[2024-12-11 07:02:48 root] (utils.py 283): INFO Epoch: [29] [ 400/2502] eta: 1:22:28 lr: 0.000001 loss_cls: 4.1465 (3.8829) grad_norm: 2.3765 (2.4478) time: 2.3594 data: 0.0003 max mem: 8426 +[2024-12-11 07:03:12 root] (utils.py 283): INFO Epoch: [29] [ 410/2502] eta: 1:22:08 lr: 0.000001 loss_cls: 4.1041 (3.8819) grad_norm: 2.3765 (2.4477) time: 2.3965 data: 0.0002 max mem: 8426 +[2024-12-11 07:03:36 root] (utils.py 283): INFO Epoch: [29] [ 420/2502] eta: 1:21:43 lr: 0.000001 loss_cls: 3.9258 (3.8834) grad_norm: 2.4044 (2.4464) time: 2.3707 data: 0.0002 max mem: 8426 +[2024-12-11 07:04:00 root] (utils.py 283): INFO Epoch: [29] [ 430/2502] eta: 1:21:21 lr: 0.000001 loss_cls: 4.2135 (3.8932) grad_norm: 2.4169 (2.4466) time: 2.3598 data: 0.0003 max mem: 8426 +[2024-12-11 07:04:23 root] (utils.py 283): INFO Epoch: [29] [ 440/2502] eta: 1:20:56 lr: 0.000001 loss_cls: 4.2331 (3.8992) grad_norm: 2.4906 (2.4471) time: 2.3625 data: 0.0003 max mem: 8426 +[2024-12-11 07:04:47 root] (utils.py 283): INFO Epoch: [29] [ 450/2502] eta: 1:20:34 lr: 0.000001 loss_cls: 4.0590 (3.8995) grad_norm: 2.4229 (2.4464) time: 2.3561 data: 0.0003 max mem: 8426 +[2024-12-11 07:05:11 root] (utils.py 283): INFO Epoch: [29] [ 460/2502] eta: 1:20:15 lr: 0.000001 loss_cls: 4.0071 (3.8999) grad_norm: 2.4108 (2.4461) time: 2.4215 data: 0.0003 max mem: 8426 +[2024-12-11 07:05:34 root] (utils.py 283): INFO Epoch: [29] [ 470/2502] eta: 1:19:49 lr: 0.000001 loss_cls: 4.0982 (3.9005) grad_norm: 2.4660 (2.4480) time: 2.3770 data: 0.0003 max mem: 8426 +[2024-12-11 07:05:58 root] (utils.py 283): INFO Epoch: [29] [ 480/2502] eta: 1:19:25 lr: 0.000001 loss_cls: 4.1845 (3.9067) grad_norm: 2.5013 (2.4497) time: 2.3248 data: 0.0003 max mem: 8426 +[2024-12-11 07:06:21 root] (utils.py 283): INFO Epoch: [29] [ 490/2502] eta: 1:19:01 lr: 0.000001 loss_cls: 3.8865 (3.9031) grad_norm: 2.4730 (2.4489) time: 2.3510 data: 0.0003 max mem: 8426 +[2024-12-11 07:06:45 root] (utils.py 283): INFO Epoch: [29] [ 500/2502] eta: 1:18:39 lr: 0.000001 loss_cls: 3.6350 (3.8981) grad_norm: 2.4427 (2.4494) time: 2.3721 data: 0.0003 max mem: 8426 +[2024-12-11 07:07:09 root] (utils.py 283): INFO Epoch: [29] [ 510/2502] eta: 1:18:18 lr: 0.000001 loss_cls: 4.1498 (3.9068) grad_norm: 2.4917 (2.4499) time: 2.4054 data: 0.0003 max mem: 8426 +[2024-12-11 07:07:33 root] (utils.py 283): INFO Epoch: [29] [ 520/2502] eta: 1:17:56 lr: 0.000001 loss_cls: 4.1498 (3.9073) grad_norm: 2.5093 (2.4507) time: 2.4071 data: 0.0003 max mem: 8426 +[2024-12-11 07:07:57 root] (utils.py 283): INFO Epoch: [29] [ 530/2502] eta: 1:17:31 lr: 0.000001 loss_cls: 3.8061 (3.9082) grad_norm: 2.4846 (2.4508) time: 2.3563 data: 0.0003 max mem: 8426 +[2024-12-11 07:08:20 root] (utils.py 283): INFO Epoch: [29] [ 540/2502] eta: 1:17:08 lr: 0.000001 loss_cls: 4.2609 (3.9141) grad_norm: 2.4845 (2.4518) time: 2.3530 data: 0.0002 max mem: 8426 +[2024-12-11 07:08:44 root] (utils.py 283): INFO Epoch: [29] [ 550/2502] eta: 1:16:45 lr: 0.000001 loss_cls: 3.9219 (3.9089) grad_norm: 2.4476 (2.4511) time: 2.3788 data: 0.0002 max mem: 8426 +[2024-12-11 07:09:07 root] (utils.py 283): INFO Epoch: [29] [ 560/2502] eta: 1:16:18 lr: 0.000001 loss_cls: 4.0076 (3.9100) grad_norm: 2.4476 (2.4512) time: 2.3206 data: 0.0003 max mem: 8426 +[2024-12-11 07:09:30 root] (utils.py 283): INFO Epoch: [29] [ 570/2502] eta: 1:15:55 lr: 0.000001 loss_cls: 4.2708 (3.9150) grad_norm: 2.4898 (2.4520) time: 2.3128 data: 0.0003 max mem: 8426 +[2024-12-11 07:09:55 root] (utils.py 283): INFO Epoch: [29] [ 580/2502] eta: 1:15:34 lr: 0.000001 loss_cls: 4.3271 (3.9206) grad_norm: 2.4185 (2.4502) time: 2.3946 data: 0.0002 max mem: 8426 +[2024-12-11 07:10:20 root] (utils.py 283): INFO Epoch: [29] [ 590/2502] eta: 1:15:15 lr: 0.000001 loss_cls: 4.0121 (3.9152) grad_norm: 2.3327 (2.4489) time: 2.4729 data: 0.0002 max mem: 8426 +[2024-12-11 07:10:44 root] (utils.py 283): INFO Epoch: [29] [ 600/2502] eta: 1:14:53 lr: 0.000001 loss_cls: 3.8610 (3.9159) grad_norm: 2.4368 (2.4495) time: 2.4585 data: 0.0002 max mem: 8426 +[2024-12-11 07:11:07 root] (utils.py 283): INFO Epoch: [29] [ 610/2502] eta: 1:14:29 lr: 0.000001 loss_cls: 3.9840 (3.9160) grad_norm: 2.4729 (2.4499) time: 2.3765 data: 0.0003 max mem: 8426 +[2024-12-11 07:11:31 root] (utils.py 283): INFO Epoch: [29] [ 620/2502] eta: 1:14:06 lr: 0.000001 loss_cls: 3.8770 (3.9164) grad_norm: 2.4629 (2.4496) time: 2.3699 data: 0.0003 max mem: 8426 +[2024-12-11 07:11:55 root] (utils.py 283): INFO Epoch: [29] [ 630/2502] eta: 1:13:43 lr: 0.000001 loss_cls: 3.9157 (3.9135) grad_norm: 2.4300 (2.4501) time: 2.3823 data: 0.0002 max mem: 8426 +[2024-12-11 07:12:18 root] (utils.py 283): INFO Epoch: [29] [ 640/2502] eta: 1:13:18 lr: 0.000001 loss_cls: 4.1905 (3.9170) grad_norm: 2.4390 (2.4502) time: 2.3490 data: 0.0002 max mem: 8426 +[2024-12-11 07:12:42 root] (utils.py 283): INFO Epoch: [29] [ 650/2502] eta: 1:12:54 lr: 0.000001 loss_cls: 4.1997 (3.9176) grad_norm: 2.4444 (2.4505) time: 2.3487 data: 0.0002 max mem: 8426 +[2024-12-11 07:13:05 root] (utils.py 283): INFO Epoch: [29] [ 660/2502] eta: 1:12:30 lr: 0.000001 loss_cls: 4.1742 (3.9194) grad_norm: 2.4835 (2.4512) time: 2.3557 data: 0.0002 max mem: 8426 +[2024-12-11 07:13:29 root] (utils.py 283): INFO Epoch: [29] [ 670/2502] eta: 1:12:06 lr: 0.000001 loss_cls: 4.1742 (3.9219) grad_norm: 2.4924 (2.4515) time: 2.3335 data: 0.0002 max mem: 8426 +[2024-12-11 07:13:53 root] (utils.py 283): INFO Epoch: [29] [ 680/2502] eta: 1:11:43 lr: 0.000001 loss_cls: 4.0830 (3.9209) grad_norm: 2.4684 (2.4501) time: 2.3574 data: 0.0003 max mem: 8426 +[2024-12-11 07:14:17 root] (utils.py 283): INFO Epoch: [29] [ 690/2502] eta: 1:11:20 lr: 0.000001 loss_cls: 3.8861 (3.9169) grad_norm: 2.3382 (2.4492) time: 2.3913 data: 0.0003 max mem: 8426 +[2024-12-11 07:14:40 root] (utils.py 283): INFO Epoch: [29] [ 700/2502] eta: 1:10:56 lr: 0.000001 loss_cls: 3.9839 (3.9189) grad_norm: 2.4205 (2.4494) time: 2.3727 data: 0.0003 max mem: 8426 +[2024-12-11 07:15:05 root] (utils.py 283): INFO Epoch: [29] [ 710/2502] eta: 1:10:35 lr: 0.000001 loss_cls: 4.0526 (3.9189) grad_norm: 2.4278 (2.4485) time: 2.3996 data: 0.0003 max mem: 8426 +[2024-12-11 07:15:29 root] (utils.py 283): INFO Epoch: [29] [ 720/2502] eta: 1:10:12 lr: 0.000001 loss_cls: 3.9555 (3.9193) grad_norm: 2.3854 (2.4477) time: 2.4229 data: 0.0002 max mem: 8426 +[2024-12-11 07:15:53 root] (utils.py 283): INFO Epoch: [29] [ 730/2502] eta: 1:09:50 lr: 0.000001 loss_cls: 3.9349 (3.9195) grad_norm: 2.4305 (2.4484) time: 2.4267 data: 0.0002 max mem: 8426 +[2024-12-11 07:16:17 root] (utils.py 283): INFO Epoch: [29] [ 740/2502] eta: 1:09:27 lr: 0.000001 loss_cls: 3.6295 (3.9173) grad_norm: 2.4592 (2.4492) time: 2.4160 data: 0.0003 max mem: 8426 +[2024-12-11 07:16:41 root] (utils.py 283): INFO Epoch: [29] [ 750/2502] eta: 1:09:04 lr: 0.000001 loss_cls: 4.0388 (3.9181) grad_norm: 2.4502 (2.4492) time: 2.3821 data: 0.0003 max mem: 8426 +[2024-12-11 07:17:05 root] (utils.py 283): INFO Epoch: [29] [ 760/2502] eta: 1:08:41 lr: 0.000001 loss_cls: 4.0473 (3.9152) grad_norm: 2.3914 (2.4495) time: 2.3888 data: 0.0003 max mem: 8426 +[2024-12-11 07:17:29 root] (utils.py 283): INFO Epoch: [29] [ 770/2502] eta: 1:08:19 lr: 0.000001 loss_cls: 3.8573 (3.9131) grad_norm: 2.3740 (2.4486) time: 2.4133 data: 0.0002 max mem: 8426 +[2024-12-11 07:17:53 root] (utils.py 283): INFO Epoch: [29] [ 780/2502] eta: 1:07:55 lr: 0.000001 loss_cls: 4.1014 (3.9123) grad_norm: 2.4241 (2.4494) time: 2.4036 data: 0.0002 max mem: 8426 +[2024-12-11 07:18:16 root] (utils.py 283): INFO Epoch: [29] [ 790/2502] eta: 1:07:31 lr: 0.000001 loss_cls: 4.1481 (3.9119) grad_norm: 2.5223 (2.4506) time: 2.3643 data: 0.0002 max mem: 8426 +[2024-12-11 07:18:40 root] (utils.py 283): INFO Epoch: [29] [ 800/2502] eta: 1:07:08 lr: 0.000001 loss_cls: 4.1031 (3.9139) grad_norm: 2.4077 (2.4501) time: 2.3805 data: 0.0002 max mem: 8426 +[2024-12-11 07:19:04 root] (utils.py 283): INFO Epoch: [29] [ 810/2502] eta: 1:06:45 lr: 0.000001 loss_cls: 4.1525 (3.9139) grad_norm: 2.4077 (2.4505) time: 2.4008 data: 0.0003 max mem: 8426 +[2024-12-11 07:19:28 root] (utils.py 283): INFO Epoch: [29] [ 820/2502] eta: 1:06:23 lr: 0.000001 loss_cls: 4.1129 (3.9146) grad_norm: 2.4118 (2.4499) time: 2.4050 data: 0.0002 max mem: 8426 +[2024-12-11 07:19:52 root] (utils.py 283): INFO Epoch: [29] [ 830/2502] eta: 1:05:59 lr: 0.000001 loss_cls: 4.0773 (3.9136) grad_norm: 2.3799 (2.4493) time: 2.3865 data: 0.0002 max mem: 8426 +[2024-12-11 07:20:16 root] (utils.py 283): INFO Epoch: [29] [ 840/2502] eta: 1:05:36 lr: 0.000001 loss_cls: 4.0995 (3.9152) grad_norm: 2.4448 (2.4494) time: 2.3794 data: 0.0002 max mem: 8426 +[2024-12-11 07:20:40 root] (utils.py 283): INFO Epoch: [29] [ 850/2502] eta: 1:05:13 lr: 0.000001 loss_cls: 3.9139 (3.9116) grad_norm: 2.4540 (2.4495) time: 2.4086 data: 0.0002 max mem: 8426 +[2024-12-11 07:21:07 root] (utils.py 283): INFO Epoch: [29] [ 860/2502] eta: 1:04:56 lr: 0.000001 loss_cls: 3.7805 (3.9121) grad_norm: 2.4247 (2.4493) time: 2.5581 data: 0.0002 max mem: 8426 +[2024-12-11 07:21:31 root] (utils.py 283): INFO Epoch: [29] [ 870/2502] eta: 1:04:32 lr: 0.000001 loss_cls: 4.0269 (3.9115) grad_norm: 2.4187 (2.4490) time: 2.5386 data: 0.0002 max mem: 8426 +[2024-12-11 07:21:55 root] (utils.py 283): INFO Epoch: [29] [ 880/2502] eta: 1:04:09 lr: 0.000001 loss_cls: 4.0108 (3.9127) grad_norm: 2.3757 (2.4487) time: 2.3913 data: 0.0002 max mem: 8426 +[2024-12-11 07:22:18 root] (utils.py 283): INFO Epoch: [29] [ 890/2502] eta: 1:03:44 lr: 0.000001 loss_cls: 3.8305 (3.9107) grad_norm: 2.3848 (2.4483) time: 2.3641 data: 0.0002 max mem: 8426 +[2024-12-11 07:22:41 root] (utils.py 283): INFO Epoch: [29] [ 900/2502] eta: 1:03:20 lr: 0.000001 loss_cls: 3.8209 (3.9115) grad_norm: 2.4457 (2.4488) time: 2.3231 data: 0.0003 max mem: 8426 +[2024-12-11 07:23:05 root] (utils.py 283): INFO Epoch: [29] [ 910/2502] eta: 1:02:56 lr: 0.000001 loss_cls: 3.8127 (3.9091) grad_norm: 2.4131 (2.4489) time: 2.3593 data: 0.0002 max mem: 8426 +[2024-12-11 07:23:29 root] (utils.py 283): INFO Epoch: [29] [ 920/2502] eta: 1:02:33 lr: 0.000001 loss_cls: 3.9271 (3.9110) grad_norm: 2.3757 (2.4489) time: 2.3844 data: 0.0003 max mem: 8426 +[2024-12-11 07:23:53 root] (utils.py 283): INFO Epoch: [29] [ 930/2502] eta: 1:02:09 lr: 0.000001 loss_cls: 4.1066 (3.9125) grad_norm: 2.4915 (2.4497) time: 2.3729 data: 0.0003 max mem: 8426 +[2024-12-11 07:24:17 root] (utils.py 283): INFO Epoch: [29] [ 940/2502] eta: 1:01:45 lr: 0.000001 loss_cls: 4.1590 (3.9133) grad_norm: 2.5429 (2.4502) time: 2.3686 data: 0.0003 max mem: 8426 +[2024-12-11 07:24:40 root] (utils.py 283): INFO Epoch: [29] [ 950/2502] eta: 1:01:20 lr: 0.000001 loss_cls: 4.1754 (3.9118) grad_norm: 2.4785 (2.4507) time: 2.3421 data: 0.0003 max mem: 8426 +[2024-12-11 07:25:04 root] (utils.py 283): INFO Epoch: [29] [ 960/2502] eta: 1:00:57 lr: 0.000001 loss_cls: 3.7837 (3.9093) grad_norm: 2.4337 (2.4504) time: 2.3501 data: 0.0003 max mem: 8426 +[2024-12-11 07:25:26 root] (utils.py 283): INFO Epoch: [29] [ 970/2502] eta: 1:00:32 lr: 0.000001 loss_cls: 3.9345 (3.9103) grad_norm: 2.4412 (2.4507) time: 2.3289 data: 0.0003 max mem: 8426 +[2024-12-11 07:25:49 root] (utils.py 283): INFO Epoch: [29] [ 980/2502] eta: 1:00:06 lr: 0.000001 loss_cls: 4.0287 (3.9112) grad_norm: 2.4439 (2.4508) time: 2.2612 data: 0.0002 max mem: 8426 +[2024-12-11 07:26:13 root] (utils.py 283): INFO Epoch: [29] [ 990/2502] eta: 0:59:42 lr: 0.000001 loss_cls: 3.9770 (3.9092) grad_norm: 2.4439 (2.4509) time: 2.3077 data: 0.0002 max mem: 8426 +[2024-12-11 07:26:37 root] (utils.py 283): INFO Epoch: [29] [1000/2502] eta: 0:59:19 lr: 0.000001 loss_cls: 3.7915 (3.9077) grad_norm: 2.4349 (2.4511) time: 2.4008 data: 0.0003 max mem: 8426 +[2024-12-11 07:27:00 root] (utils.py 283): INFO Epoch: [29] [1010/2502] eta: 0:58:55 lr: 0.000001 loss_cls: 3.7918 (3.9074) grad_norm: 2.4571 (2.4510) time: 2.3877 data: 0.0003 max mem: 8426 +[2024-12-11 07:27:24 root] (utils.py 283): INFO Epoch: [29] [1020/2502] eta: 0:58:32 lr: 0.000001 loss_cls: 3.9860 (3.9084) grad_norm: 2.4631 (2.4510) time: 2.3624 data: 0.0003 max mem: 8426 +[2024-12-11 07:27:47 root] (utils.py 283): INFO Epoch: [29] [1030/2502] eta: 0:58:08 lr: 0.000001 loss_cls: 4.0322 (3.9075) grad_norm: 2.4583 (2.4516) time: 2.3555 data: 0.0002 max mem: 8426 +[2024-12-11 07:28:11 root] (utils.py 283): INFO Epoch: [29] [1040/2502] eta: 0:57:43 lr: 0.000001 loss_cls: 3.8938 (3.9054) grad_norm: 2.4453 (2.4515) time: 2.3301 data: 0.0003 max mem: 8426 +[2024-12-11 07:28:34 root] (utils.py 283): INFO Epoch: [29] [1050/2502] eta: 0:57:19 lr: 0.000001 loss_cls: 4.1259 (3.9082) grad_norm: 2.4030 (2.4511) time: 2.3125 data: 0.0003 max mem: 8426 +[2024-12-11 07:28:56 root] (utils.py 283): INFO Epoch: [29] [1060/2502] eta: 0:56:54 lr: 0.000001 loss_cls: 4.1722 (3.9072) grad_norm: 2.3949 (2.4512) time: 2.2897 data: 0.0003 max mem: 8426 +[2024-12-11 07:29:20 root] (utils.py 283): INFO Epoch: [29] [1070/2502] eta: 0:56:30 lr: 0.000001 loss_cls: 3.9307 (3.9075) grad_norm: 2.3949 (2.4509) time: 2.3084 data: 0.0003 max mem: 8426 +[2024-12-11 07:29:43 root] (utils.py 283): INFO Epoch: [29] [1080/2502] eta: 0:56:06 lr: 0.000001 loss_cls: 3.9307 (3.9080) grad_norm: 2.3723 (2.4506) time: 2.3478 data: 0.0003 max mem: 8426 +[2024-12-11 07:30:07 root] (utils.py 283): INFO Epoch: [29] [1090/2502] eta: 0:55:42 lr: 0.000001 loss_cls: 4.0172 (3.9072) grad_norm: 2.4153 (2.4503) time: 2.3612 data: 0.0003 max mem: 8426 +[2024-12-11 07:30:31 root] (utils.py 283): INFO Epoch: [29] [1100/2502] eta: 0:55:18 lr: 0.000001 loss_cls: 3.6294 (3.9051) grad_norm: 2.4245 (2.4505) time: 2.3608 data: 0.0002 max mem: 8426 +[2024-12-11 07:30:54 root] (utils.py 283): INFO Epoch: [29] [1110/2502] eta: 0:54:55 lr: 0.000001 loss_cls: 3.6294 (3.9044) grad_norm: 2.4056 (2.4500) time: 2.3686 data: 0.0003 max mem: 8426 +[2024-12-11 07:31:18 root] (utils.py 283): INFO Epoch: [29] [1120/2502] eta: 0:54:31 lr: 0.000001 loss_cls: 4.0851 (3.9057) grad_norm: 2.3813 (2.4492) time: 2.3844 data: 0.0003 max mem: 8426 +[2024-12-11 07:31:41 root] (utils.py 283): INFO Epoch: [29] [1130/2502] eta: 0:54:07 lr: 0.000001 loss_cls: 4.0274 (3.9035) grad_norm: 2.3758 (2.4486) time: 2.3524 data: 0.0003 max mem: 8426 +[2024-12-11 07:32:04 root] (utils.py 283): INFO Epoch: [29] [1140/2502] eta: 0:53:42 lr: 0.000001 loss_cls: 3.7663 (3.9016) grad_norm: 2.4337 (2.4492) time: 2.2942 data: 0.0003 max mem: 8426 +[2024-12-11 07:32:27 root] (utils.py 283): INFO Epoch: [29] [1150/2502] eta: 0:53:18 lr: 0.000001 loss_cls: 3.6893 (3.8996) grad_norm: 2.4134 (2.4481) time: 2.2828 data: 0.0003 max mem: 8426 +[2024-12-11 07:32:50 root] (utils.py 283): INFO Epoch: [29] [1160/2502] eta: 0:52:54 lr: 0.000001 loss_cls: 3.6369 (3.8967) grad_norm: 2.3660 (2.4480) time: 2.3021 data: 0.0003 max mem: 8426 +[2024-12-11 07:33:14 root] (utils.py 283): INFO Epoch: [29] [1170/2502] eta: 0:52:30 lr: 0.000001 loss_cls: 3.8409 (3.8976) grad_norm: 2.4204 (2.4482) time: 2.3386 data: 0.0003 max mem: 8426 +[2024-12-11 07:33:38 root] (utils.py 283): INFO Epoch: [29] [1180/2502] eta: 0:52:06 lr: 0.000001 loss_cls: 4.1448 (3.8990) grad_norm: 2.4618 (2.4484) time: 2.3725 data: 0.0003 max mem: 8426 +[2024-12-11 07:34:01 root] (utils.py 283): INFO Epoch: [29] [1190/2502] eta: 0:51:43 lr: 0.000001 loss_cls: 4.1448 (3.8986) grad_norm: 2.4514 (2.4486) time: 2.3628 data: 0.0002 max mem: 8426 +[2024-12-11 07:34:26 root] (utils.py 283): INFO Epoch: [29] [1200/2502] eta: 0:51:20 lr: 0.000001 loss_cls: 3.9090 (3.8977) grad_norm: 2.4472 (2.4492) time: 2.3921 data: 0.0002 max mem: 8426 +[2024-12-11 07:34:49 root] (utils.py 283): INFO Epoch: [29] [1210/2502] eta: 0:50:56 lr: 0.000001 loss_cls: 3.8724 (3.8974) grad_norm: 2.4660 (2.4492) time: 2.3789 data: 0.0003 max mem: 8426 +[2024-12-11 07:35:12 root] (utils.py 283): INFO Epoch: [29] [1220/2502] eta: 0:50:32 lr: 0.000001 loss_cls: 3.7672 (3.8966) grad_norm: 2.4486 (2.4490) time: 2.3353 data: 0.0003 max mem: 8426 +[2024-12-11 07:35:36 root] (utils.py 283): INFO Epoch: [29] [1230/2502] eta: 0:50:08 lr: 0.000001 loss_cls: 3.8354 (3.8972) grad_norm: 2.4486 (2.4489) time: 2.3499 data: 0.0003 max mem: 8426 +[2024-12-11 07:35:59 root] (utils.py 283): INFO Epoch: [29] [1240/2502] eta: 0:49:44 lr: 0.000001 loss_cls: 4.0331 (3.8988) grad_norm: 2.4841 (2.4494) time: 2.3201 data: 0.0003 max mem: 8426 +[2024-12-11 07:36:22 root] (utils.py 283): INFO Epoch: [29] [1250/2502] eta: 0:49:20 lr: 0.000001 loss_cls: 3.9955 (3.8978) grad_norm: 2.4896 (2.4497) time: 2.3137 data: 0.0003 max mem: 8426 +[2024-12-11 07:36:45 root] (utils.py 283): INFO Epoch: [29] [1260/2502] eta: 0:48:55 lr: 0.000001 loss_cls: 4.1663 (3.8990) grad_norm: 2.4898 (2.4502) time: 2.3219 data: 0.0002 max mem: 8426 +[2024-12-11 07:37:09 root] (utils.py 283): INFO Epoch: [29] [1270/2502] eta: 0:48:32 lr: 0.000001 loss_cls: 4.1663 (3.8978) grad_norm: 2.4194 (2.4502) time: 2.3261 data: 0.0003 max mem: 8426 +[2024-12-11 07:37:32 root] (utils.py 283): INFO Epoch: [29] [1280/2502] eta: 0:48:08 lr: 0.000001 loss_cls: 4.1081 (3.8987) grad_norm: 2.4194 (2.4501) time: 2.3362 data: 0.0003 max mem: 8426 +[2024-12-11 07:37:55 root] (utils.py 283): INFO Epoch: [29] [1290/2502] eta: 0:47:43 lr: 0.000001 loss_cls: 4.0451 (3.8995) grad_norm: 2.4258 (2.4497) time: 2.3164 data: 0.0003 max mem: 8426 +[2024-12-11 07:38:20 root] (utils.py 283): INFO Epoch: [29] [1300/2502] eta: 0:47:21 lr: 0.000001 loss_cls: 3.9264 (3.8986) grad_norm: 2.3601 (2.4491) time: 2.3888 data: 0.0003 max mem: 8426 +[2024-12-11 07:38:43 root] (utils.py 283): INFO Epoch: [29] [1310/2502] eta: 0:46:57 lr: 0.000001 loss_cls: 3.8965 (3.8987) grad_norm: 2.3604 (2.4496) time: 2.4041 data: 0.0003 max mem: 8426 +[2024-12-11 07:39:07 root] (utils.py 283): INFO Epoch: [29] [1320/2502] eta: 0:46:33 lr: 0.000001 loss_cls: 3.7228 (3.8956) grad_norm: 2.4239 (2.4495) time: 2.3552 data: 0.0003 max mem: 8426 +[2024-12-11 07:39:31 root] (utils.py 283): INFO Epoch: [29] [1330/2502] eta: 0:46:10 lr: 0.000001 loss_cls: 3.3255 (3.8931) grad_norm: 2.3900 (2.4490) time: 2.3846 data: 0.0003 max mem: 8426 +[2024-12-11 07:39:54 root] (utils.py 283): INFO Epoch: [29] [1340/2502] eta: 0:45:46 lr: 0.000001 loss_cls: 3.9003 (3.8935) grad_norm: 2.4027 (2.4493) time: 2.3631 data: 0.0003 max mem: 8426 +[2024-12-11 07:40:18 root] (utils.py 283): INFO Epoch: [29] [1350/2502] eta: 0:45:23 lr: 0.000001 loss_cls: 4.1352 (3.8934) grad_norm: 2.4255 (2.4493) time: 2.3757 data: 0.0002 max mem: 8426 +[2024-12-11 07:40:41 root] (utils.py 283): INFO Epoch: [29] [1360/2502] eta: 0:44:59 lr: 0.000001 loss_cls: 4.1703 (3.8950) grad_norm: 2.3872 (2.4490) time: 2.3625 data: 0.0002 max mem: 8426 +[2024-12-11 07:41:05 root] (utils.py 283): INFO Epoch: [29] [1370/2502] eta: 0:44:35 lr: 0.000001 loss_cls: 4.0303 (3.8941) grad_norm: 2.3774 (2.4484) time: 2.3395 data: 0.0002 max mem: 8426 +[2024-12-11 07:41:29 root] (utils.py 283): INFO Epoch: [29] [1380/2502] eta: 0:44:12 lr: 0.000001 loss_cls: 3.6831 (3.8918) grad_norm: 2.4090 (2.4485) time: 2.3898 data: 0.0003 max mem: 8426 +[2024-12-11 07:41:53 root] (utils.py 283): INFO Epoch: [29] [1390/2502] eta: 0:43:49 lr: 0.000001 loss_cls: 3.4877 (3.8904) grad_norm: 2.4369 (2.4486) time: 2.4215 data: 0.0003 max mem: 8426 +[2024-12-11 07:42:16 root] (utils.py 283): INFO Epoch: [29] [1400/2502] eta: 0:43:25 lr: 0.000001 loss_cls: 3.7662 (3.8878) grad_norm: 2.3902 (2.4484) time: 2.3710 data: 0.0002 max mem: 8426 +[2024-12-11 07:42:41 root] (utils.py 283): INFO Epoch: [29] [1410/2502] eta: 0:43:02 lr: 0.000001 loss_cls: 4.0096 (3.8897) grad_norm: 2.3621 (2.4477) time: 2.3685 data: 0.0003 max mem: 8426 +[2024-12-11 07:43:05 root] (utils.py 283): INFO Epoch: [29] [1420/2502] eta: 0:42:38 lr: 0.000001 loss_cls: 4.0573 (3.8892) grad_norm: 2.3978 (2.4477) time: 2.4072 data: 0.0003 max mem: 8426 +[2024-12-11 07:43:28 root] (utils.py 283): INFO Epoch: [29] [1430/2502] eta: 0:42:14 lr: 0.000001 loss_cls: 3.7015 (3.8894) grad_norm: 2.4279 (2.4477) time: 2.3607 data: 0.0003 max mem: 8426 +[2024-12-11 07:43:51 root] (utils.py 283): INFO Epoch: [29] [1440/2502] eta: 0:41:50 lr: 0.000001 loss_cls: 4.1150 (3.8907) grad_norm: 2.4379 (2.4477) time: 2.3027 data: 0.0003 max mem: 8426 +[2024-12-11 07:44:14 root] (utils.py 283): INFO Epoch: [29] [1450/2502] eta: 0:41:26 lr: 0.000001 loss_cls: 4.1550 (3.8911) grad_norm: 2.4395 (2.4479) time: 2.2997 data: 0.0003 max mem: 8426 +[2024-12-11 07:44:37 root] (utils.py 283): INFO Epoch: [29] [1460/2502] eta: 0:41:02 lr: 0.000001 loss_cls: 4.1604 (3.8925) grad_norm: 2.4900 (2.4483) time: 2.3328 data: 0.0003 max mem: 8426 +[2024-12-11 07:45:01 root] (utils.py 283): INFO Epoch: [29] [1470/2502] eta: 0:40:39 lr: 0.000001 loss_cls: 4.1717 (3.8944) grad_norm: 2.4224 (2.4479) time: 2.3527 data: 0.0003 max mem: 8426 +[2024-12-11 07:45:25 root] (utils.py 283): INFO Epoch: [29] [1480/2502] eta: 0:40:15 lr: 0.000001 loss_cls: 4.0897 (3.8940) grad_norm: 2.4341 (2.4483) time: 2.3684 data: 0.0002 max mem: 8426 +[2024-12-11 07:45:48 root] (utils.py 283): INFO Epoch: [29] [1490/2502] eta: 0:39:51 lr: 0.000001 loss_cls: 3.9409 (3.8940) grad_norm: 2.4426 (2.4485) time: 2.3416 data: 0.0003 max mem: 8426 +[2024-12-11 07:46:12 root] (utils.py 283): INFO Epoch: [29] [1500/2502] eta: 0:39:28 lr: 0.000001 loss_cls: 3.8448 (3.8922) grad_norm: 2.4123 (2.4483) time: 2.3489 data: 0.0003 max mem: 8426 +[2024-12-11 07:46:35 root] (utils.py 283): INFO Epoch: [29] [1510/2502] eta: 0:39:04 lr: 0.000001 loss_cls: 3.8448 (3.8922) grad_norm: 2.4526 (2.4487) time: 2.3483 data: 0.0003 max mem: 8426 +[2024-12-11 07:46:59 root] (utils.py 283): INFO Epoch: [29] [1520/2502] eta: 0:38:40 lr: 0.000001 loss_cls: 4.0096 (3.8936) grad_norm: 2.5292 (2.4492) time: 2.3629 data: 0.0002 max mem: 8426 +[2024-12-11 07:47:23 root] (utils.py 283): INFO Epoch: [29] [1530/2502] eta: 0:38:17 lr: 0.000001 loss_cls: 4.0096 (3.8930) grad_norm: 2.4184 (2.4490) time: 2.4017 data: 0.0002 max mem: 8426 +[2024-12-11 07:47:46 root] (utils.py 283): INFO Epoch: [29] [1540/2502] eta: 0:37:53 lr: 0.000001 loss_cls: 3.8406 (3.8928) grad_norm: 2.4268 (2.4491) time: 2.3745 data: 0.0002 max mem: 8426 +[2024-12-11 07:48:10 root] (utils.py 283): INFO Epoch: [29] [1550/2502] eta: 0:37:29 lr: 0.000001 loss_cls: 4.1067 (3.8933) grad_norm: 2.4358 (2.4493) time: 2.3494 data: 0.0003 max mem: 8426 +[2024-12-11 07:48:33 root] (utils.py 283): INFO Epoch: [29] [1560/2502] eta: 0:37:05 lr: 0.000001 loss_cls: 4.1810 (3.8961) grad_norm: 2.4901 (2.4496) time: 2.3267 data: 0.0003 max mem: 8426 +[2024-12-11 07:48:56 root] (utils.py 283): INFO Epoch: [29] [1570/2502] eta: 0:36:42 lr: 0.000001 loss_cls: 4.2262 (3.8973) grad_norm: 2.5031 (2.4493) time: 2.3166 data: 0.0003 max mem: 8426 +[2024-12-11 07:49:19 root] (utils.py 283): INFO Epoch: [29] [1580/2502] eta: 0:36:18 lr: 0.000001 loss_cls: 3.8373 (3.8965) grad_norm: 2.4209 (2.4491) time: 2.3258 data: 0.0003 max mem: 8426 +[2024-12-11 07:49:43 root] (utils.py 283): INFO Epoch: [29] [1590/2502] eta: 0:35:54 lr: 0.000001 loss_cls: 3.8373 (3.8964) grad_norm: 2.4222 (2.4492) time: 2.3268 data: 0.0003 max mem: 8426 +[2024-12-11 07:50:06 root] (utils.py 283): INFO Epoch: [29] [1600/2502] eta: 0:35:30 lr: 0.000001 loss_cls: 3.9549 (3.8961) grad_norm: 2.4222 (2.4489) time: 2.3331 data: 0.0003 max mem: 8426 +[2024-12-11 07:50:29 root] (utils.py 283): INFO Epoch: [29] [1610/2502] eta: 0:35:06 lr: 0.000001 loss_cls: 3.9936 (3.8960) grad_norm: 2.3939 (2.4486) time: 2.2905 data: 0.0003 max mem: 8426 +[2024-12-11 07:50:52 root] (utils.py 283): INFO Epoch: [29] [1620/2502] eta: 0:34:42 lr: 0.000001 loss_cls: 3.9674 (3.8942) grad_norm: 2.3920 (2.4485) time: 2.3060 data: 0.0003 max mem: 8426 +[2024-12-11 07:51:16 root] (utils.py 283): INFO Epoch: [29] [1630/2502] eta: 0:34:19 lr: 0.000001 loss_cls: 3.7988 (3.8944) grad_norm: 2.4244 (2.4483) time: 2.3551 data: 0.0003 max mem: 8426 +[2024-12-11 07:51:39 root] (utils.py 283): INFO Epoch: [29] [1640/2502] eta: 0:33:55 lr: 0.000001 loss_cls: 3.8875 (3.8950) grad_norm: 2.4244 (2.4486) time: 2.3546 data: 0.0003 max mem: 8426 +[2024-12-11 07:52:02 root] (utils.py 283): INFO Epoch: [29] [1650/2502] eta: 0:33:31 lr: 0.000001 loss_cls: 3.7840 (3.8925) grad_norm: 2.4025 (2.4484) time: 2.3429 data: 0.0003 max mem: 8426 +[2024-12-11 07:52:25 root] (utils.py 283): INFO Epoch: [29] [1660/2502] eta: 0:33:07 lr: 0.000001 loss_cls: 3.3381 (3.8901) grad_norm: 2.3596 (2.4488) time: 2.3070 data: 0.0002 max mem: 8426 +[2024-12-11 07:52:49 root] (utils.py 283): INFO Epoch: [29] [1670/2502] eta: 0:32:44 lr: 0.000001 loss_cls: 3.6064 (3.8907) grad_norm: 2.4422 (2.4492) time: 2.3220 data: 0.0002 max mem: 8426 +[2024-12-11 07:53:12 root] (utils.py 283): INFO Epoch: [29] [1680/2502] eta: 0:32:20 lr: 0.000001 loss_cls: 3.8196 (3.8886) grad_norm: 2.4179 (2.4489) time: 2.3337 data: 0.0003 max mem: 8426 +[2024-12-11 07:53:36 root] (utils.py 283): INFO Epoch: [29] [1690/2502] eta: 0:31:56 lr: 0.000001 loss_cls: 3.7909 (3.8886) grad_norm: 2.4252 (2.4489) time: 2.3302 data: 0.0003 max mem: 8426 +[2024-12-11 07:53:59 root] (utils.py 283): INFO Epoch: [29] [1700/2502] eta: 0:31:32 lr: 0.000001 loss_cls: 3.6904 (3.8870) grad_norm: 2.4516 (2.4491) time: 2.3246 data: 0.0003 max mem: 8426 +[2024-12-11 07:54:22 root] (utils.py 283): INFO Epoch: [29] [1710/2502] eta: 0:31:08 lr: 0.000001 loss_cls: 3.9983 (3.8885) grad_norm: 2.4589 (2.4490) time: 2.3138 data: 0.0003 max mem: 8426 +[2024-12-11 07:54:45 root] (utils.py 283): INFO Epoch: [29] [1720/2502] eta: 0:30:45 lr: 0.000001 loss_cls: 4.1204 (3.8893) grad_norm: 2.4132 (2.4487) time: 2.3269 data: 0.0003 max mem: 8426 +[2024-12-11 07:55:08 root] (utils.py 283): INFO Epoch: [29] [1730/2502] eta: 0:30:21 lr: 0.000001 loss_cls: 3.9219 (3.8880) grad_norm: 2.3814 (2.4486) time: 2.3120 data: 0.0003 max mem: 8426 +[2024-12-11 07:55:31 root] (utils.py 283): INFO Epoch: [29] [1740/2502] eta: 0:29:57 lr: 0.000001 loss_cls: 3.9645 (3.8889) grad_norm: 2.3814 (2.4484) time: 2.3010 data: 0.0002 max mem: 8426 +[2024-12-11 07:55:55 root] (utils.py 283): INFO Epoch: [29] [1750/2502] eta: 0:29:33 lr: 0.000001 loss_cls: 4.0975 (3.8897) grad_norm: 2.3930 (2.4482) time: 2.3413 data: 0.0003 max mem: 8426 +[2024-12-11 07:56:18 root] (utils.py 283): INFO Epoch: [29] [1760/2502] eta: 0:29:10 lr: 0.000001 loss_cls: 4.0801 (3.8897) grad_norm: 2.3834 (2.4481) time: 2.3370 data: 0.0003 max mem: 8426 +[2024-12-11 07:56:41 root] (utils.py 283): INFO Epoch: [29] [1770/2502] eta: 0:28:46 lr: 0.000001 loss_cls: 4.1623 (3.8909) grad_norm: 2.4111 (2.4485) time: 2.3205 data: 0.0002 max mem: 8426 +[2024-12-11 07:57:05 root] (utils.py 283): INFO Epoch: [29] [1780/2502] eta: 0:28:22 lr: 0.000001 loss_cls: 4.1549 (3.8913) grad_norm: 2.4496 (2.4484) time: 2.3399 data: 0.0002 max mem: 8426 +[2024-12-11 07:57:28 root] (utils.py 283): INFO Epoch: [29] [1790/2502] eta: 0:27:59 lr: 0.000001 loss_cls: 4.1352 (3.8916) grad_norm: 2.4530 (2.4487) time: 2.3268 data: 0.0002 max mem: 8426 +[2024-12-11 07:57:52 root] (utils.py 283): INFO Epoch: [29] [1800/2502] eta: 0:27:35 lr: 0.000001 loss_cls: 4.2755 (3.8935) grad_norm: 2.4843 (2.4485) time: 2.3763 data: 0.0003 max mem: 8426 +[2024-12-11 07:58:16 root] (utils.py 283): INFO Epoch: [29] [1810/2502] eta: 0:27:12 lr: 0.000001 loss_cls: 4.2154 (3.8940) grad_norm: 2.3961 (2.4482) time: 2.3961 data: 0.0003 max mem: 8426 +[2024-12-11 07:58:39 root] (utils.py 283): INFO Epoch: [29] [1820/2502] eta: 0:26:48 lr: 0.000001 loss_cls: 3.9218 (3.8926) grad_norm: 2.3883 (2.4480) time: 2.3365 data: 0.0003 max mem: 8426 +[2024-12-11 07:59:02 root] (utils.py 283): INFO Epoch: [29] [1830/2502] eta: 0:26:24 lr: 0.000001 loss_cls: 3.9545 (3.8940) grad_norm: 2.3788 (2.4482) time: 2.3359 data: 0.0003 max mem: 8426 +[2024-12-11 07:59:26 root] (utils.py 283): INFO Epoch: [29] [1840/2502] eta: 0:26:01 lr: 0.000001 loss_cls: 4.1740 (3.8949) grad_norm: 2.4039 (2.4483) time: 2.3411 data: 0.0003 max mem: 8426 +[2024-12-11 07:59:49 root] (utils.py 283): INFO Epoch: [29] [1850/2502] eta: 0:25:37 lr: 0.000001 loss_cls: 4.0122 (3.8940) grad_norm: 2.4745 (2.4483) time: 2.3429 data: 0.0003 max mem: 8426 +[2024-12-11 08:00:13 root] (utils.py 283): INFO Epoch: [29] [1860/2502] eta: 0:25:13 lr: 0.000001 loss_cls: 3.7523 (3.8929) grad_norm: 2.4677 (2.4484) time: 2.3524 data: 0.0003 max mem: 8426 +[2024-12-11 08:00:37 root] (utils.py 283): INFO Epoch: [29] [1870/2502] eta: 0:24:50 lr: 0.000001 loss_cls: 3.9224 (3.8941) grad_norm: 2.4473 (2.4487) time: 2.4025 data: 0.0003 max mem: 8426 +[2024-12-11 08:01:02 root] (utils.py 283): INFO Epoch: [29] [1880/2502] eta: 0:24:27 lr: 0.000001 loss_cls: 3.9362 (3.8935) grad_norm: 2.4454 (2.4487) time: 2.4656 data: 0.0003 max mem: 8426 +[2024-12-11 08:01:26 root] (utils.py 283): INFO Epoch: [29] [1890/2502] eta: 0:24:04 lr: 0.000001 loss_cls: 3.8255 (3.8928) grad_norm: 2.4227 (2.4488) time: 2.4433 data: 0.0003 max mem: 8426 +[2024-12-11 08:01:50 root] (utils.py 283): INFO Epoch: [29] [1900/2502] eta: 0:23:40 lr: 0.000001 loss_cls: 3.9450 (3.8929) grad_norm: 2.4321 (2.4489) time: 2.3886 data: 0.0003 max mem: 8426 +[2024-12-11 08:02:14 root] (utils.py 283): INFO Epoch: [29] [1910/2502] eta: 0:23:16 lr: 0.000001 loss_cls: 4.0180 (3.8933) grad_norm: 2.4144 (2.4488) time: 2.3733 data: 0.0003 max mem: 8426 +[2024-12-11 08:02:37 root] (utils.py 283): INFO Epoch: [29] [1920/2502] eta: 0:22:53 lr: 0.000001 loss_cls: 4.1257 (3.8939) grad_norm: 2.4414 (2.4490) time: 2.3666 data: 0.0002 max mem: 8426 +[2024-12-11 08:03:01 root] (utils.py 283): INFO Epoch: [29] [1930/2502] eta: 0:22:29 lr: 0.000001 loss_cls: 3.8382 (3.8932) grad_norm: 2.4414 (2.4490) time: 2.3518 data: 0.0003 max mem: 8426 +[2024-12-11 08:03:24 root] (utils.py 283): INFO Epoch: [29] [1940/2502] eta: 0:22:05 lr: 0.000001 loss_cls: 3.7848 (3.8933) grad_norm: 2.4067 (2.4487) time: 2.3350 data: 0.0003 max mem: 8426 +[2024-12-11 08:03:47 root] (utils.py 283): INFO Epoch: [29] [1950/2502] eta: 0:21:42 lr: 0.000001 loss_cls: 3.9960 (3.8927) grad_norm: 2.4253 (2.4490) time: 2.3293 data: 0.0003 max mem: 8426 +[2024-12-11 08:04:11 root] (utils.py 283): INFO Epoch: [29] [1960/2502] eta: 0:21:18 lr: 0.000001 loss_cls: 3.8788 (3.8920) grad_norm: 2.4498 (2.4489) time: 2.3522 data: 0.0003 max mem: 8426 +[2024-12-11 08:04:35 root] (utils.py 283): INFO Epoch: [29] [1970/2502] eta: 0:20:55 lr: 0.000001 loss_cls: 3.8641 (3.8914) grad_norm: 2.4498 (2.4490) time: 2.3762 data: 0.0003 max mem: 8426 +[2024-12-11 08:04:58 root] (utils.py 283): INFO Epoch: [29] [1980/2502] eta: 0:20:31 lr: 0.000001 loss_cls: 4.0683 (3.8925) grad_norm: 2.4349 (2.4490) time: 2.3501 data: 0.0003 max mem: 8426 +[2024-12-11 08:05:22 root] (utils.py 283): INFO Epoch: [29] [1990/2502] eta: 0:20:07 lr: 0.000001 loss_cls: 3.6630 (3.8898) grad_norm: 2.4849 (2.4491) time: 2.3394 data: 0.0003 max mem: 8426 +[2024-12-11 08:05:46 root] (utils.py 283): INFO Epoch: [29] [2000/2502] eta: 0:19:44 lr: 0.000001 loss_cls: 3.6536 (3.8901) grad_norm: 2.5236 (2.4494) time: 2.3805 data: 0.0003 max mem: 8426 +[2024-12-11 08:06:09 root] (utils.py 283): INFO Epoch: [29] [2010/2502] eta: 0:19:20 lr: 0.000001 loss_cls: 4.1924 (3.8903) grad_norm: 2.5236 (2.4497) time: 2.3860 data: 0.0002 max mem: 8426 +[2024-12-11 08:06:33 root] (utils.py 283): INFO Epoch: [29] [2020/2502] eta: 0:18:57 lr: 0.000001 loss_cls: 4.1700 (3.8900) grad_norm: 2.4845 (2.4496) time: 2.3914 data: 0.0003 max mem: 8426 +[2024-12-11 08:06:57 root] (utils.py 283): INFO Epoch: [29] [2030/2502] eta: 0:18:33 lr: 0.000001 loss_cls: 3.9829 (3.8903) grad_norm: 2.4105 (2.4495) time: 2.3949 data: 0.0003 max mem: 8426 +[2024-12-11 08:07:21 root] (utils.py 283): INFO Epoch: [29] [2040/2502] eta: 0:18:10 lr: 0.000001 loss_cls: 4.0942 (3.8908) grad_norm: 2.3946 (2.4493) time: 2.3946 data: 0.0003 max mem: 8426 +[2024-12-11 08:07:45 root] (utils.py 283): INFO Epoch: [29] [2050/2502] eta: 0:17:46 lr: 0.000001 loss_cls: 4.1562 (3.8928) grad_norm: 2.4383 (2.4491) time: 2.3870 data: 0.0003 max mem: 8426 +[2024-12-11 08:08:09 root] (utils.py 283): INFO Epoch: [29] [2060/2502] eta: 0:17:23 lr: 0.000001 loss_cls: 4.1419 (3.8912) grad_norm: 2.4357 (2.4489) time: 2.3599 data: 0.0003 max mem: 8426 +[2024-12-11 08:08:33 root] (utils.py 283): INFO Epoch: [29] [2070/2502] eta: 0:16:59 lr: 0.000001 loss_cls: 3.3800 (3.8891) grad_norm: 2.3975 (2.4486) time: 2.3864 data: 0.0003 max mem: 8426 +[2024-12-11 08:08:57 root] (utils.py 283): INFO Epoch: [29] [2080/2502] eta: 0:16:36 lr: 0.000001 loss_cls: 3.8095 (3.8913) grad_norm: 2.4260 (2.4490) time: 2.4258 data: 0.0003 max mem: 8426 +[2024-12-11 08:09:20 root] (utils.py 283): INFO Epoch: [29] [2090/2502] eta: 0:16:12 lr: 0.000001 loss_cls: 4.2857 (3.8915) grad_norm: 2.4894 (2.4492) time: 2.3836 data: 0.0003 max mem: 8426 +[2024-12-11 08:09:45 root] (utils.py 283): INFO Epoch: [29] [2100/2502] eta: 0:15:49 lr: 0.000001 loss_cls: 4.1739 (3.8924) grad_norm: 2.4984 (2.4493) time: 2.3909 data: 0.0003 max mem: 8426 +[2024-12-11 08:10:09 root] (utils.py 283): INFO Epoch: [29] [2110/2502] eta: 0:15:25 lr: 0.000001 loss_cls: 4.0477 (3.8924) grad_norm: 2.4018 (2.4490) time: 2.4393 data: 0.0003 max mem: 8426 +[2024-12-11 08:10:33 root] (utils.py 283): INFO Epoch: [29] [2120/2502] eta: 0:15:01 lr: 0.000001 loss_cls: 3.9954 (3.8930) grad_norm: 2.4346 (2.4494) time: 2.3829 data: 0.0003 max mem: 8426 +[2024-12-11 08:10:56 root] (utils.py 283): INFO Epoch: [29] [2130/2502] eta: 0:14:38 lr: 0.000001 loss_cls: 4.0271 (3.8932) grad_norm: 2.5210 (2.4495) time: 2.3441 data: 0.0002 max mem: 8426 +[2024-12-11 08:11:20 root] (utils.py 283): INFO Epoch: [29] [2140/2502] eta: 0:14:14 lr: 0.000001 loss_cls: 4.0271 (3.8934) grad_norm: 2.4848 (2.4493) time: 2.3558 data: 0.0003 max mem: 8426 +[2024-12-11 08:11:43 root] (utils.py 283): INFO Epoch: [29] [2150/2502] eta: 0:13:51 lr: 0.000001 loss_cls: 3.8448 (3.8933) grad_norm: 2.4316 (2.4493) time: 2.3338 data: 0.0003 max mem: 8426 +[2024-12-11 08:12:06 root] (utils.py 283): INFO Epoch: [29] [2160/2502] eta: 0:13:27 lr: 0.000001 loss_cls: 4.1276 (3.8941) grad_norm: 2.4619 (2.4494) time: 2.3060 data: 0.0003 max mem: 8426 +[2024-12-11 08:12:29 root] (utils.py 283): INFO Epoch: [29] [2170/2502] eta: 0:13:03 lr: 0.000001 loss_cls: 4.1276 (3.8950) grad_norm: 2.5449 (2.4498) time: 2.3298 data: 0.0003 max mem: 8426 +[2024-12-11 08:12:53 root] (utils.py 283): INFO Epoch: [29] [2180/2502] eta: 0:12:40 lr: 0.000001 loss_cls: 4.1568 (3.8965) grad_norm: 2.5420 (2.4501) time: 2.3590 data: 0.0003 max mem: 8426 +[2024-12-11 08:13:16 root] (utils.py 283): INFO Epoch: [29] [2190/2502] eta: 0:12:16 lr: 0.000001 loss_cls: 4.2221 (3.8972) grad_norm: 2.4753 (2.4508) time: 2.3327 data: 0.0003 max mem: 8426 +[2024-12-11 08:13:39 root] (utils.py 283): INFO Epoch: [29] [2200/2502] eta: 0:11:52 lr: 0.000001 loss_cls: 4.0659 (3.8976) grad_norm: 2.4542 (2.4505) time: 2.3175 data: 0.0003 max mem: 8426 +[2024-12-11 08:14:03 root] (utils.py 283): INFO Epoch: [29] [2210/2502] eta: 0:11:29 lr: 0.000001 loss_cls: 3.9491 (3.8970) grad_norm: 2.4185 (2.4505) time: 2.3421 data: 0.0002 max mem: 8426 +[2024-12-11 08:14:27 root] (utils.py 283): INFO Epoch: [29] [2220/2502] eta: 0:11:05 lr: 0.000001 loss_cls: 3.9491 (3.8965) grad_norm: 2.4185 (2.4502) time: 2.3846 data: 0.0002 max mem: 8426 +[2024-12-11 08:14:50 root] (utils.py 283): INFO Epoch: [29] [2230/2502] eta: 0:10:42 lr: 0.000001 loss_cls: 3.8876 (3.8964) grad_norm: 2.3843 (2.4500) time: 2.3799 data: 0.0003 max mem: 8426 +[2024-12-11 08:15:14 root] (utils.py 283): INFO Epoch: [29] [2240/2502] eta: 0:10:18 lr: 0.000001 loss_cls: 3.9627 (3.8969) grad_norm: 2.3812 (2.4496) time: 2.3596 data: 0.0002 max mem: 8426 +[2024-12-11 08:15:38 root] (utils.py 283): INFO Epoch: [29] [2250/2502] eta: 0:09:54 lr: 0.000001 loss_cls: 4.3317 (3.8987) grad_norm: 2.3973 (2.4495) time: 2.3510 data: 0.0003 max mem: 8426 +[2024-12-11 08:16:01 root] (utils.py 283): INFO Epoch: [29] [2260/2502] eta: 0:09:31 lr: 0.000001 loss_cls: 4.1206 (3.8983) grad_norm: 2.4427 (2.4494) time: 2.3489 data: 0.0003 max mem: 8426 +[2024-12-11 08:16:25 root] (utils.py 283): INFO Epoch: [29] [2270/2502] eta: 0:09:07 lr: 0.000001 loss_cls: 3.7131 (3.8970) grad_norm: 2.3683 (2.4492) time: 2.3926 data: 0.0002 max mem: 8426 +[2024-12-11 08:16:49 root] (utils.py 283): INFO Epoch: [29] [2280/2502] eta: 0:08:44 lr: 0.000001 loss_cls: 3.7261 (3.8968) grad_norm: 2.3683 (2.4490) time: 2.4094 data: 0.0002 max mem: 8426 +[2024-12-11 08:17:13 root] (utils.py 283): INFO Epoch: [29] [2290/2502] eta: 0:08:20 lr: 0.000001 loss_cls: 3.9822 (3.8968) grad_norm: 2.4130 (2.4495) time: 2.3694 data: 0.0003 max mem: 8426 +[2024-12-11 08:17:36 root] (utils.py 283): INFO Epoch: [29] [2300/2502] eta: 0:07:56 lr: 0.000001 loss_cls: 4.1210 (3.8977) grad_norm: 2.4552 (2.4494) time: 2.3120 data: 0.0003 max mem: 8426 +[2024-12-11 08:17:59 root] (utils.py 283): INFO Epoch: [29] [2310/2502] eta: 0:07:33 lr: 0.000001 loss_cls: 4.0096 (3.8977) grad_norm: 2.4069 (2.4494) time: 2.3002 data: 0.0003 max mem: 8426 +[2024-12-11 08:18:22 root] (utils.py 283): INFO Epoch: [29] [2320/2502] eta: 0:07:09 lr: 0.000001 loss_cls: 3.8664 (3.8968) grad_norm: 2.3732 (2.4494) time: 2.3291 data: 0.0003 max mem: 8426 +[2024-12-11 08:18:46 root] (utils.py 283): INFO Epoch: [29] [2330/2502] eta: 0:06:45 lr: 0.000001 loss_cls: 4.0518 (3.8974) grad_norm: 2.3955 (2.4493) time: 2.3493 data: 0.0003 max mem: 8426 +[2024-12-11 08:19:09 root] (utils.py 283): INFO Epoch: [29] [2340/2502] eta: 0:06:22 lr: 0.000001 loss_cls: 4.0633 (3.8975) grad_norm: 2.4058 (2.4493) time: 2.3358 data: 0.0003 max mem: 8426 +[2024-12-11 08:19:32 root] (utils.py 283): INFO Epoch: [29] [2350/2502] eta: 0:05:58 lr: 0.000001 loss_cls: 4.0838 (3.8985) grad_norm: 2.5204 (2.4496) time: 2.2924 data: 0.0003 max mem: 8426 +[2024-12-11 08:19:55 root] (utils.py 283): INFO Epoch: [29] [2360/2502] eta: 0:05:35 lr: 0.000001 loss_cls: 4.0512 (3.8981) grad_norm: 2.5394 (2.4497) time: 2.2796 data: 0.0003 max mem: 8426 +[2024-12-11 08:20:19 root] (utils.py 283): INFO Epoch: [29] [2370/2502] eta: 0:05:11 lr: 0.000001 loss_cls: 4.0845 (3.8990) grad_norm: 2.4566 (2.4497) time: 2.3909 data: 0.0003 max mem: 8426 +[2024-12-11 08:20:43 root] (utils.py 283): INFO Epoch: [29] [2380/2502] eta: 0:04:47 lr: 0.000001 loss_cls: 4.0405 (3.8986) grad_norm: 2.3877 (2.4494) time: 2.4041 data: 0.0003 max mem: 8426 +[2024-12-11 08:21:07 root] (utils.py 283): INFO Epoch: [29] [2390/2502] eta: 0:04:24 lr: 0.000001 loss_cls: 3.9807 (3.8984) grad_norm: 2.3877 (2.4499) time: 2.3550 data: 0.0002 max mem: 8426 +[2024-12-11 08:21:30 root] (utils.py 283): INFO Epoch: [29] [2400/2502] eta: 0:04:00 lr: 0.000001 loss_cls: 3.9807 (3.8989) grad_norm: 2.4497 (2.4499) time: 2.3453 data: 0.0003 max mem: 8426 +[2024-12-11 08:21:53 root] (utils.py 283): INFO Epoch: [29] [2410/2502] eta: 0:03:37 lr: 0.000001 loss_cls: 4.0620 (3.8985) grad_norm: 2.4497 (2.4502) time: 2.3412 data: 0.0002 max mem: 8426 +[2024-12-11 08:22:17 root] (utils.py 283): INFO Epoch: [29] [2420/2502] eta: 0:03:13 lr: 0.000001 loss_cls: 3.9615 (3.8978) grad_norm: 2.4401 (2.4504) time: 2.3610 data: 0.0003 max mem: 8426 +[2024-12-11 08:22:41 root] (utils.py 283): INFO Epoch: [29] [2430/2502] eta: 0:02:49 lr: 0.000001 loss_cls: 4.0117 (3.8979) grad_norm: 2.4656 (2.4506) time: 2.3666 data: 0.0003 max mem: 8426 +[2024-12-11 08:23:04 root] (utils.py 283): INFO Epoch: [29] [2440/2502] eta: 0:02:26 lr: 0.000001 loss_cls: 4.2213 (3.8979) grad_norm: 2.4741 (2.4507) time: 2.3666 data: 0.0003 max mem: 8426 +[2024-12-11 08:23:27 root] (utils.py 283): INFO Epoch: [29] [2450/2502] eta: 0:02:02 lr: 0.000001 loss_cls: 3.6824 (3.8968) grad_norm: 2.3611 (2.4503) time: 2.2984 data: 0.0003 max mem: 8426 +[2024-12-11 08:23:58 root] (utils.py 283): INFO Epoch: [29] [2460/2502] eta: 0:01:39 lr: 0.000001 loss_cls: 3.7531 (3.8966) grad_norm: 2.3611 (2.4502) time: 2.7002 data: 0.0003 max mem: 8426 +[2024-12-11 08:24:28 root] (utils.py 283): INFO Epoch: [29] [2470/2502] eta: 0:01:15 lr: 0.000001 loss_cls: 3.8095 (3.8964) grad_norm: 2.3852 (2.4501) time: 3.0723 data: 0.0003 max mem: 8426 +[2024-12-11 08:24:52 root] (utils.py 283): INFO Epoch: [29] [2480/2502] eta: 0:00:52 lr: 0.000001 loss_cls: 4.0765 (3.8970) grad_norm: 2.3776 (2.4503) time: 2.6805 data: 0.0003 max mem: 8426 +[2024-12-11 08:25:17 root] (utils.py 283): INFO Epoch: [29] [2490/2502] eta: 0:00:28 lr: 0.000001 loss_cls: 4.0562 (3.8970) grad_norm: 2.5352 (2.4507) time: 2.4206 data: 0.0280 max mem: 8426 +[2024-12-11 08:25:39 root] (utils.py 283): INFO Epoch: [29] [2500/2502] eta: 0:00:04 lr: 0.000001 loss_cls: 4.0429 (3.8973) grad_norm: 2.5472 (2.4511) time: 2.3617 data: 0.0280 max mem: 8426 +[2024-12-11 08:25:41 root] (utils.py 283): INFO Epoch: [29] [2501/2502] eta: 0:00:02 lr: 0.000001 loss_cls: 4.1636 (3.8975) grad_norm: 2.5472 (2.4511) time: 2.3482 data: 0.0280 max mem: 8426 +[2024-12-11 08:25:41 root] (utils.py 297): INFO Epoch: [29] Total time: 1:38:37 (2.3650 s / it) +[2024-12-11 08:25:41 root] (engine.py 179): INFO Averaged stats:lr: 0.000001 loss_cls: 4.1636 (3.8903) grad_norm: 2.5472 (2.4511) +[2024-12-11 08:25:42 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:28 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.6273 (0.6273) acc1: 86.7188 (86.7188) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.2865 data: 0.0003 max mem: 8426 +[2024-12-11 08:25:45 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:24 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7190 (0.8003) acc1: 86.7188 (82.5284) acc3: 95.3125 (93.8920) acc5: 97.6562 (96.6619) time: 0.2744 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:47 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8514 (0.8565) acc1: 81.2500 (81.4360) acc3: 92.9688 (93.1920) acc5: 96.0938 (95.6845) time: 0.2265 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:48 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9363 (0.8688) acc1: 78.9062 (80.5948) acc3: 92.9688 (93.2712) acc5: 96.0938 (95.8417) time: 0.1873 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:51 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8026 (0.8582) acc1: 80.4688 (80.9832) acc3: 94.5312 (93.3308) acc5: 96.8750 (95.9223) time: 0.2076 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:52 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0540 (0.9428) acc1: 75.7812 (78.9369) acc3: 88.2812 (91.9271) acc5: 92.9688 (94.8683) time: 0.2001 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:55 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2274 (0.9894) acc1: 72.6562 (78.0610) acc3: 85.1562 (90.9580) acc5: 89.8438 (94.0574) time: 0.2000 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:57 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1839 (1.0286) acc1: 75.0000 (77.2007) acc3: 86.7188 (90.4930) acc5: 89.8438 (93.6620) time: 0.2040 data: 0.0004 max mem: 8426 +[2024-12-11 08:25:58 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2030 (1.0629) acc1: 73.4375 (76.3600) acc3: 86.7188 (89.9113) acc5: 89.8438 (93.1038) time: 0.1872 data: 0.0006 max mem: 8426 +[2024-12-11 08:26:01 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2072 (1.0909) acc1: 71.0938 (75.7040) acc3: 85.9375 (89.5261) acc5: 89.8438 (92.8400) time: 0.2087 data: 0.0006 max mem: 8426 +[2024-12-11 08:26:02 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1365 (1.0791) acc1: 73.4375 (75.9600) acc3: 89.0625 (89.6880) acc5: 91.4062 (93.0240) time: 0.2060 data: 0.0005 max mem: 8426 +[2024-12-11 08:26:02 root] (utils.py 297): INFO Test: Total time: 0:00:20 (0.2073 s / it) +[2024-12-11 08:26:02 root] (engine.py 264): INFO * Acc@1 75.866 Acc@3 89.700 Acc@5 92.970 loss 1.080 flops 1.285 layer_flops 1.251 +[2024-12-11 08:26:02 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 75.9% +[2024-12-11 08:26:02 root] (main.py 576): INFO Max accuracy: 75.89% +[2024-12-11 08:26:02 root] (main.py 589): INFO Finetune time 21:04:24