diff --git "a/training.log" "b/training.log" new file mode 100644--- /dev/null +++ "b/training.log" @@ -0,0 +1,2236 @@ +2024-10-01 22:37:13,256 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,257 Model: "SequenceTagger( + (embeddings): TransformerWordEmbeddings( + (model): BertModel( + (embeddings): BertEmbeddings( + (word_embeddings): Embedding(35001, 768) + (position_embeddings): Embedding(512, 768) + (token_type_embeddings): Embedding(2, 768) + (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (encoder): BertEncoder( + (layer): ModuleList( + (0-11): 12 x BertLayer( + (attention): BertAttention( + (self): BertSelfAttention( + (query): Linear(in_features=768, out_features=768, bias=True) + (key): Linear(in_features=768, out_features=768, bias=True) + (value): Linear(in_features=768, out_features=768, bias=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + (output): BertSelfOutput( + (dense): Linear(in_features=768, out_features=768, bias=True) + (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + (intermediate): BertIntermediate( + (dense): Linear(in_features=768, out_features=3072, bias=True) + (intermediate_act_fn): GELUActivation() + ) + (output): BertOutput( + (dense): Linear(in_features=3072, out_features=768, bias=True) + (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) + (dropout): Dropout(p=0.1, inplace=False) + ) + ) + ) + ) + (pooler): BertPooler( + (dense): Linear(in_features=768, out_features=768, bias=True) + (activation): Tanh() + ) + ) + ) + (locked_dropout): LockedDropout(p=0.5) + (linear): Linear(in_features=768, out_features=73, bias=True) + (loss_function): CrossEntropyLoss() +)" +2024-10-01 22:37:13,257 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,258 Corpus: 17132 train + 1904 dev + 2116 test sentences +2024-10-01 22:37:13,259 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,259 Train: 17132 sentences +2024-10-01 22:37:13,260 (train_with_dev=False, train_with_test=False) +2024-10-01 22:37:13,260 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,261 Training Params: +2024-10-01 22:37:13,261 - learning_rate: "5e-06" +2024-10-01 22:37:13,261 - mini_batch_size: "8" +2024-10-01 22:37:13,262 - max_epochs: "150" +2024-10-01 22:37:13,262 - shuffle: "True" +2024-10-01 22:37:13,262 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,263 Plugins: +2024-10-01 22:37:13,263 - LinearScheduler | warmup_fraction: '0.1' +2024-10-01 22:37:13,264 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,264 Final evaluation on model from best epoch (best-model.pt) +2024-10-01 22:37:13,264 - metric: "('micro avg', 'f1-score')" +2024-10-01 22:37:13,265 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,265 Computation: +2024-10-01 22:37:13,265 - compute on device: cuda:0 +2024-10-01 22:37:13,266 - embedding storage: none +2024-10-01 22:37:13,266 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,267 Model training base path: "elNER18-bert-base-greek-uncased-v1-bs8-e150-lr5e-06" +2024-10-01 22:37:13,267 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:13,267 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:37:31,950 epoch 1 - iter 214/2142 - loss 5.08110807 - time (sec): 18.68 - samples/sec: 2681.77 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:37:50,162 epoch 1 - iter 428/2142 - loss 5.01774334 - time (sec): 36.89 - samples/sec: 2754.98 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:38:08,198 epoch 1 - iter 642/2142 - loss 4.89948212 - time (sec): 54.93 - samples/sec: 2738.51 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:38:26,548 epoch 1 - iter 856/2142 - loss 4.72554791 - time (sec): 73.28 - samples/sec: 2741.05 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:38:45,499 epoch 1 - iter 1070/2142 - loss 4.48855112 - time (sec): 92.23 - samples/sec: 2743.47 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:39:04,377 epoch 1 - iter 1284/2142 - loss 4.17788434 - time (sec): 111.11 - samples/sec: 2733.06 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:39:22,674 epoch 1 - iter 1498/2142 - loss 3.81447340 - time (sec): 129.40 - samples/sec: 2728.05 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:39:41,143 epoch 1 - iter 1712/2142 - loss 3.48568652 - time (sec): 147.87 - samples/sec: 2725.88 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:39:59,791 epoch 1 - iter 1926/2142 - loss 3.21850024 - time (sec): 166.52 - samples/sec: 2726.57 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:40:18,681 epoch 1 - iter 2140/2142 - loss 3.00072883 - time (sec): 185.41 - samples/sec: 2726.37 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:40:18,844 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:40:18,844 EPOCH 1 done: loss 2.9992 - lr: 0.000000 +2024-10-01 22:40:26,214 DEV : loss 0.9668176174163818 - f1-score (micro avg) 0.0 +2024-10-01 22:40:26,258 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:40:44,626 epoch 2 - iter 214/2142 - loss 1.02606982 - time (sec): 18.37 - samples/sec: 2742.18 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:41:03,550 epoch 2 - iter 428/2142 - loss 1.01809185 - time (sec): 37.29 - samples/sec: 2703.58 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:41:22,833 epoch 2 - iter 642/2142 - loss 0.98871076 - time (sec): 56.57 - samples/sec: 2685.63 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:41:41,807 epoch 2 - iter 856/2142 - loss 0.96083040 - time (sec): 75.55 - samples/sec: 2685.57 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:42:00,448 epoch 2 - iter 1070/2142 - loss 0.94021172 - time (sec): 94.19 - samples/sec: 2692.79 - lr: 0.000000 - momentum: 0.000000 +2024-10-01 22:42:19,085 epoch 2 - iter 1284/2142 - loss 0.91917172 - time (sec): 112.83 - samples/sec: 2694.00 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:42:37,649 epoch 2 - iter 1498/2142 - loss 0.89709309 - time (sec): 131.39 - samples/sec: 2701.66 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:42:56,286 epoch 2 - iter 1712/2142 - loss 0.87656353 - time (sec): 150.03 - samples/sec: 2702.00 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:43:14,761 epoch 2 - iter 1926/2142 - loss 0.85641809 - time (sec): 168.50 - samples/sec: 2706.43 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:43:33,039 epoch 2 - iter 2140/2142 - loss 0.83675751 - time (sec): 186.78 - samples/sec: 2706.64 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:43:33,205 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:43:33,206 EPOCH 2 done: loss 0.8367 - lr: 0.000001 +2024-10-01 22:43:43,908 DEV : loss 0.5483314990997314 - f1-score (micro avg) 0.3786 +2024-10-01 22:43:43,940 saving best model +2024-10-01 22:43:44,363 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:44:02,936 epoch 3 - iter 214/2142 - loss 0.62089832 - time (sec): 18.57 - samples/sec: 2723.61 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:44:21,928 epoch 3 - iter 428/2142 - loss 0.60865789 - time (sec): 37.56 - samples/sec: 2700.05 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:44:40,490 epoch 3 - iter 642/2142 - loss 0.59806545 - time (sec): 56.13 - samples/sec: 2718.68 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:44:59,442 epoch 3 - iter 856/2142 - loss 0.58301141 - time (sec): 75.08 - samples/sec: 2721.13 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:45:18,219 epoch 3 - iter 1070/2142 - loss 0.56461074 - time (sec): 93.85 - samples/sec: 2720.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:45:36,665 epoch 3 - iter 1284/2142 - loss 0.55676109 - time (sec): 112.30 - samples/sec: 2718.36 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:45:55,879 epoch 3 - iter 1498/2142 - loss 0.54368116 - time (sec): 131.51 - samples/sec: 2708.80 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:46:14,730 epoch 3 - iter 1712/2142 - loss 0.53456959 - time (sec): 150.37 - samples/sec: 2704.25 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:46:33,204 epoch 3 - iter 1926/2142 - loss 0.52446685 - time (sec): 168.84 - samples/sec: 2701.46 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:46:51,872 epoch 3 - iter 2140/2142 - loss 0.51324611 - time (sec): 187.51 - samples/sec: 2696.01 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:46:52,039 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:46:52,039 EPOCH 3 done: loss 0.5132 - lr: 0.000001 +2024-10-01 22:47:02,002 DEV : loss 0.3197055160999298 - f1-score (micro avg) 0.646 +2024-10-01 22:47:02,029 saving best model +2024-10-01 22:47:05,558 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:47:23,992 epoch 4 - iter 214/2142 - loss 0.38920277 - time (sec): 18.43 - samples/sec: 2738.53 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:47:42,985 epoch 4 - iter 428/2142 - loss 0.38622709 - time (sec): 37.43 - samples/sec: 2712.93 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:48:01,939 epoch 4 - iter 642/2142 - loss 0.38112450 - time (sec): 56.38 - samples/sec: 2687.09 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:48:20,372 epoch 4 - iter 856/2142 - loss 0.37261298 - time (sec): 74.81 - samples/sec: 2681.35 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:48:38,998 epoch 4 - iter 1070/2142 - loss 0.36659319 - time (sec): 93.44 - samples/sec: 2693.97 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:48:57,793 epoch 4 - iter 1284/2142 - loss 0.36030469 - time (sec): 112.23 - samples/sec: 2691.79 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:49:16,508 epoch 4 - iter 1498/2142 - loss 0.35531237 - time (sec): 130.95 - samples/sec: 2694.61 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:49:34,829 epoch 4 - iter 1712/2142 - loss 0.34754416 - time (sec): 149.27 - samples/sec: 2704.65 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:49:54,193 epoch 4 - iter 1926/2142 - loss 0.34069853 - time (sec): 168.63 - samples/sec: 2699.15 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:50:12,860 epoch 4 - iter 2140/2142 - loss 0.33468839 - time (sec): 187.30 - samples/sec: 2698.73 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:50:13,036 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:50:13,036 EPOCH 4 done: loss 0.3346 - lr: 0.000001 +2024-10-01 22:50:22,404 DEV : loss 0.22554738819599152 - f1-score (micro avg) 0.7471 +2024-10-01 22:50:22,439 saving best model +2024-10-01 22:50:25,906 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:50:44,459 epoch 5 - iter 214/2142 - loss 0.26992080 - time (sec): 18.55 - samples/sec: 2724.03 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:51:03,724 epoch 5 - iter 428/2142 - loss 0.28073323 - time (sec): 37.82 - samples/sec: 2687.66 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:51:22,079 epoch 5 - iter 642/2142 - loss 0.27155442 - time (sec): 56.17 - samples/sec: 2695.66 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:51:41,032 epoch 5 - iter 856/2142 - loss 0.26745399 - time (sec): 75.12 - samples/sec: 2700.75 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:51:59,908 epoch 5 - iter 1070/2142 - loss 0.26305934 - time (sec): 94.00 - samples/sec: 2698.39 - lr: 0.000001 - momentum: 0.000000 +2024-10-01 22:52:18,321 epoch 5 - iter 1284/2142 - loss 0.25948988 - time (sec): 112.41 - samples/sec: 2701.73 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:52:37,408 epoch 5 - iter 1498/2142 - loss 0.25398965 - time (sec): 131.50 - samples/sec: 2697.09 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:52:55,819 epoch 5 - iter 1712/2142 - loss 0.24855251 - time (sec): 149.91 - samples/sec: 2702.58 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:53:14,253 epoch 5 - iter 1926/2142 - loss 0.24467250 - time (sec): 168.35 - samples/sec: 2701.72 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:53:33,013 epoch 5 - iter 2140/2142 - loss 0.24171663 - time (sec): 187.10 - samples/sec: 2700.23 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:53:33,194 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:53:33,194 EPOCH 5 done: loss 0.2417 - lr: 0.000002 +2024-10-01 22:53:43,649 DEV : loss 0.17331355810165405 - f1-score (micro avg) 0.7976 +2024-10-01 22:53:43,679 saving best model +2024-10-01 22:53:47,391 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:54:06,569 epoch 6 - iter 214/2142 - loss 0.20240762 - time (sec): 19.18 - samples/sec: 2724.67 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:54:25,079 epoch 6 - iter 428/2142 - loss 0.20160073 - time (sec): 37.69 - samples/sec: 2749.12 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:54:43,335 epoch 6 - iter 642/2142 - loss 0.20105825 - time (sec): 55.94 - samples/sec: 2732.89 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:55:02,448 epoch 6 - iter 856/2142 - loss 0.20046421 - time (sec): 75.06 - samples/sec: 2715.58 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:55:21,286 epoch 6 - iter 1070/2142 - loss 0.19467497 - time (sec): 93.89 - samples/sec: 2708.44 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:55:40,011 epoch 6 - iter 1284/2142 - loss 0.19412621 - time (sec): 112.62 - samples/sec: 2710.17 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:55:58,435 epoch 6 - iter 1498/2142 - loss 0.19291556 - time (sec): 131.04 - samples/sec: 2708.55 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:56:17,310 epoch 6 - iter 1712/2142 - loss 0.19201011 - time (sec): 149.92 - samples/sec: 2698.66 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:56:36,109 epoch 6 - iter 1926/2142 - loss 0.18957828 - time (sec): 168.72 - samples/sec: 2697.73 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:56:54,758 epoch 6 - iter 2140/2142 - loss 0.18700822 - time (sec): 187.37 - samples/sec: 2698.02 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:56:54,915 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:56:54,915 EPOCH 6 done: loss 0.1869 - lr: 0.000002 +2024-10-01 22:57:04,863 DEV : loss 0.13921551406383514 - f1-score (micro avg) 0.8415 +2024-10-01 22:57:04,898 saving best model +2024-10-01 22:57:08,397 ---------------------------------------------------------------------------------------------------- +2024-10-01 22:57:26,745 epoch 7 - iter 214/2142 - loss 0.15984148 - time (sec): 18.35 - samples/sec: 2723.87 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:57:45,662 epoch 7 - iter 428/2142 - loss 0.15923463 - time (sec): 37.26 - samples/sec: 2722.90 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:58:05,108 epoch 7 - iter 642/2142 - loss 0.15843862 - time (sec): 56.71 - samples/sec: 2685.81 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:58:23,667 epoch 7 - iter 856/2142 - loss 0.15589286 - time (sec): 75.27 - samples/sec: 2703.93 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:58:42,245 epoch 7 - iter 1070/2142 - loss 0.15496434 - time (sec): 93.85 - samples/sec: 2699.15 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:59:01,310 epoch 7 - iter 1284/2142 - loss 0.15552155 - time (sec): 112.91 - samples/sec: 2691.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:59:20,194 epoch 7 - iter 1498/2142 - loss 0.15404262 - time (sec): 131.79 - samples/sec: 2690.60 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:59:38,667 epoch 7 - iter 1712/2142 - loss 0.15236201 - time (sec): 150.27 - samples/sec: 2689.79 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 22:59:57,183 epoch 7 - iter 1926/2142 - loss 0.15127731 - time (sec): 168.78 - samples/sec: 2694.40 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:00:15,838 epoch 7 - iter 2140/2142 - loss 0.14958576 - time (sec): 187.44 - samples/sec: 2696.73 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:00:16,007 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:00:16,008 EPOCH 7 done: loss 0.1496 - lr: 0.000002 +2024-10-01 23:00:25,200 DEV : loss 0.1172066256403923 - f1-score (micro avg) 0.8696 +2024-10-01 23:00:25,228 saving best model +2024-10-01 23:00:28,472 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:00:46,763 epoch 8 - iter 214/2142 - loss 0.12863988 - time (sec): 18.29 - samples/sec: 2771.06 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:01:05,536 epoch 8 - iter 428/2142 - loss 0.12879750 - time (sec): 37.06 - samples/sec: 2731.39 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:01:24,494 epoch 8 - iter 642/2142 - loss 0.12808302 - time (sec): 56.02 - samples/sec: 2730.34 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:01:43,352 epoch 8 - iter 856/2142 - loss 0.12547450 - time (sec): 74.88 - samples/sec: 2732.45 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:02:02,051 epoch 8 - iter 1070/2142 - loss 0.12557645 - time (sec): 93.58 - samples/sec: 2714.14 - lr: 0.000002 - momentum: 0.000000 +2024-10-01 23:02:20,882 epoch 8 - iter 1284/2142 - loss 0.12488670 - time (sec): 112.41 - samples/sec: 2711.27 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:02:39,572 epoch 8 - iter 1498/2142 - loss 0.12521028 - time (sec): 131.10 - samples/sec: 2712.74 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:02:58,053 epoch 8 - iter 1712/2142 - loss 0.12444534 - time (sec): 149.58 - samples/sec: 2706.60 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:03:17,047 epoch 8 - iter 1926/2142 - loss 0.12373155 - time (sec): 168.57 - samples/sec: 2702.11 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:03:35,905 epoch 8 - iter 2140/2142 - loss 0.12340193 - time (sec): 187.43 - samples/sec: 2696.53 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:03:36,058 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:03:36,059 EPOCH 8 done: loss 0.1234 - lr: 0.000003 +2024-10-01 23:03:46,384 DEV : loss 0.10745403170585632 - f1-score (micro avg) 0.8848 +2024-10-01 23:03:46,414 saving best model +2024-10-01 23:03:49,663 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:04:08,281 epoch 9 - iter 214/2142 - loss 0.11580444 - time (sec): 18.62 - samples/sec: 2713.80 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:04:27,349 epoch 9 - iter 428/2142 - loss 0.10784012 - time (sec): 37.68 - samples/sec: 2689.12 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:04:45,838 epoch 9 - iter 642/2142 - loss 0.10671023 - time (sec): 56.17 - samples/sec: 2701.71 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:05:04,601 epoch 9 - iter 856/2142 - loss 0.10720330 - time (sec): 74.94 - samples/sec: 2692.66 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:05:23,720 epoch 9 - iter 1070/2142 - loss 0.10714606 - time (sec): 94.06 - samples/sec: 2686.99 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:05:42,277 epoch 9 - iter 1284/2142 - loss 0.10680298 - time (sec): 112.61 - samples/sec: 2687.74 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:06:01,085 epoch 9 - iter 1498/2142 - loss 0.10607865 - time (sec): 131.42 - samples/sec: 2680.93 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:06:19,816 epoch 9 - iter 1712/2142 - loss 0.10623012 - time (sec): 150.15 - samples/sec: 2682.61 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:06:39,201 epoch 9 - iter 1926/2142 - loss 0.10574086 - time (sec): 169.54 - samples/sec: 2681.47 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:06:57,973 epoch 9 - iter 2140/2142 - loss 0.10466757 - time (sec): 188.31 - samples/sec: 2684.71 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:06:58,119 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:06:58,120 EPOCH 9 done: loss 0.1047 - lr: 0.000003 +2024-10-01 23:07:07,904 DEV : loss 0.10531777143478394 - f1-score (micro avg) 0.8878 +2024-10-01 23:07:07,932 saving best model +2024-10-01 23:07:11,035 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:07:29,388 epoch 10 - iter 214/2142 - loss 0.09029796 - time (sec): 18.35 - samples/sec: 2679.79 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:07:48,063 epoch 10 - iter 428/2142 - loss 0.09311687 - time (sec): 37.03 - samples/sec: 2700.13 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:08:07,002 epoch 10 - iter 642/2142 - loss 0.09231448 - time (sec): 55.97 - samples/sec: 2706.15 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:08:25,689 epoch 10 - iter 856/2142 - loss 0.08996797 - time (sec): 74.65 - samples/sec: 2706.96 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:08:44,053 epoch 10 - iter 1070/2142 - loss 0.08948200 - time (sec): 93.02 - samples/sec: 2723.49 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:09:03,256 epoch 10 - iter 1284/2142 - loss 0.08953757 - time (sec): 112.22 - samples/sec: 2709.28 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:09:22,159 epoch 10 - iter 1498/2142 - loss 0.08935969 - time (sec): 131.12 - samples/sec: 2710.69 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:09:41,228 epoch 10 - iter 1712/2142 - loss 0.08899992 - time (sec): 150.19 - samples/sec: 2704.39 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:10:00,248 epoch 10 - iter 1926/2142 - loss 0.08883150 - time (sec): 169.21 - samples/sec: 2694.57 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:10:18,887 epoch 10 - iter 2140/2142 - loss 0.08867834 - time (sec): 187.85 - samples/sec: 2690.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:10:19,045 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:10:19,045 EPOCH 10 done: loss 0.0886 - lr: 0.000003 +2024-10-01 23:10:28,493 DEV : loss 0.1022661104798317 - f1-score (micro avg) 0.8905 +2024-10-01 23:10:28,519 saving best model +2024-10-01 23:10:31,779 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:10:50,903 epoch 11 - iter 214/2142 - loss 0.07326429 - time (sec): 19.12 - samples/sec: 2695.13 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:11:09,539 epoch 11 - iter 428/2142 - loss 0.07544389 - time (sec): 37.76 - samples/sec: 2680.57 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:11:27,868 epoch 11 - iter 642/2142 - loss 0.07572450 - time (sec): 56.09 - samples/sec: 2688.33 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:11:46,666 epoch 11 - iter 856/2142 - loss 0.07659482 - time (sec): 74.88 - samples/sec: 2695.05 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:12:05,471 epoch 11 - iter 1070/2142 - loss 0.07732688 - time (sec): 93.69 - samples/sec: 2687.96 - lr: 0.000003 - momentum: 0.000000 +2024-10-01 23:12:24,197 epoch 11 - iter 1284/2142 - loss 0.07667926 - time (sec): 112.42 - samples/sec: 2689.97 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:12:42,948 epoch 11 - iter 1498/2142 - loss 0.07747072 - time (sec): 131.17 - samples/sec: 2693.55 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:13:01,629 epoch 11 - iter 1712/2142 - loss 0.07731806 - time (sec): 149.85 - samples/sec: 2691.73 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:13:20,365 epoch 11 - iter 1926/2142 - loss 0.07741084 - time (sec): 168.58 - samples/sec: 2695.36 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:13:39,383 epoch 11 - iter 2140/2142 - loss 0.07656632 - time (sec): 187.60 - samples/sec: 2694.72 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:13:39,547 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:13:39,547 EPOCH 11 done: loss 0.0766 - lr: 0.000004 +2024-10-01 23:13:49,871 DEV : loss 0.10101401805877686 - f1-score (micro avg) 0.898 +2024-10-01 23:13:49,898 saving best model +2024-10-01 23:13:53,068 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:14:11,524 epoch 12 - iter 214/2142 - loss 0.06439849 - time (sec): 18.45 - samples/sec: 2678.84 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:14:30,560 epoch 12 - iter 428/2142 - loss 0.06488239 - time (sec): 37.49 - samples/sec: 2683.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:14:49,127 epoch 12 - iter 642/2142 - loss 0.06489805 - time (sec): 56.06 - samples/sec: 2694.40 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:15:08,149 epoch 12 - iter 856/2142 - loss 0.06442857 - time (sec): 75.08 - samples/sec: 2704.41 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:15:26,804 epoch 12 - iter 1070/2142 - loss 0.06549589 - time (sec): 93.73 - samples/sec: 2701.63 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:15:45,649 epoch 12 - iter 1284/2142 - loss 0.06602695 - time (sec): 112.58 - samples/sec: 2697.88 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:16:04,413 epoch 12 - iter 1498/2142 - loss 0.06612008 - time (sec): 131.34 - samples/sec: 2695.31 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:16:23,207 epoch 12 - iter 1712/2142 - loss 0.06633529 - time (sec): 150.14 - samples/sec: 2698.64 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:16:42,264 epoch 12 - iter 1926/2142 - loss 0.06623859 - time (sec): 169.19 - samples/sec: 2696.34 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:17:00,969 epoch 12 - iter 2140/2142 - loss 0.06604244 - time (sec): 187.90 - samples/sec: 2690.14 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:17:01,190 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:17:01,190 EPOCH 12 done: loss 0.0661 - lr: 0.000004 +2024-10-01 23:17:10,489 DEV : loss 0.10987376421689987 - f1-score (micro avg) 0.8967 +2024-10-01 23:17:10,518 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:17:29,774 epoch 13 - iter 214/2142 - loss 0.05475158 - time (sec): 19.25 - samples/sec: 2662.46 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:17:48,540 epoch 13 - iter 428/2142 - loss 0.05492115 - time (sec): 38.02 - samples/sec: 2690.30 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:18:07,211 epoch 13 - iter 642/2142 - loss 0.05478909 - time (sec): 56.69 - samples/sec: 2712.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:18:26,430 epoch 13 - iter 856/2142 - loss 0.05609340 - time (sec): 75.91 - samples/sec: 2695.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:18:45,007 epoch 13 - iter 1070/2142 - loss 0.05558601 - time (sec): 94.49 - samples/sec: 2686.29 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:19:03,987 epoch 13 - iter 1284/2142 - loss 0.05722867 - time (sec): 113.47 - samples/sec: 2686.88 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:19:22,495 epoch 13 - iter 1498/2142 - loss 0.05741053 - time (sec): 131.97 - samples/sec: 2681.66 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:19:41,117 epoch 13 - iter 1712/2142 - loss 0.05690920 - time (sec): 150.60 - samples/sec: 2683.98 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:19:59,931 epoch 13 - iter 1926/2142 - loss 0.05672375 - time (sec): 169.41 - samples/sec: 2686.77 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:20:18,684 epoch 13 - iter 2140/2142 - loss 0.05648740 - time (sec): 188.16 - samples/sec: 2686.65 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:20:18,833 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:20:18,833 EPOCH 13 done: loss 0.0565 - lr: 0.000004 +2024-10-01 23:20:29,561 DEV : loss 0.10422074794769287 - f1-score (micro avg) 0.9066 +2024-10-01 23:20:29,634 saving best model +2024-10-01 23:20:33,527 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:20:52,664 epoch 14 - iter 214/2142 - loss 0.04546238 - time (sec): 19.14 - samples/sec: 2694.03 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:21:11,599 epoch 14 - iter 428/2142 - loss 0.04633746 - time (sec): 38.07 - samples/sec: 2694.85 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:21:30,544 epoch 14 - iter 642/2142 - loss 0.04906738 - time (sec): 57.02 - samples/sec: 2679.94 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:21:49,459 epoch 14 - iter 856/2142 - loss 0.04831412 - time (sec): 75.93 - samples/sec: 2681.88 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:22:08,168 epoch 14 - iter 1070/2142 - loss 0.04960761 - time (sec): 94.64 - samples/sec: 2680.67 - lr: 0.000004 - momentum: 0.000000 +2024-10-01 23:22:26,842 epoch 14 - iter 1284/2142 - loss 0.04957222 - time (sec): 113.31 - samples/sec: 2676.64 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:22:46,024 epoch 14 - iter 1498/2142 - loss 0.05033836 - time (sec): 132.50 - samples/sec: 2673.92 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:23:05,002 epoch 14 - iter 1712/2142 - loss 0.04995704 - time (sec): 151.47 - samples/sec: 2674.81 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:23:24,502 epoch 14 - iter 1926/2142 - loss 0.04946453 - time (sec): 170.97 - samples/sec: 2665.95 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:23:43,173 epoch 14 - iter 2140/2142 - loss 0.04944830 - time (sec): 189.64 - samples/sec: 2665.52 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:23:43,321 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:23:43,322 EPOCH 14 done: loss 0.0494 - lr: 0.000005 +2024-10-01 23:23:54,385 DEV : loss 0.11036770045757294 - f1-score (micro avg) 0.9084 +2024-10-01 23:23:54,432 saving best model +2024-10-01 23:23:58,022 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:24:16,965 epoch 15 - iter 214/2142 - loss 0.03886788 - time (sec): 18.94 - samples/sec: 2714.71 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:24:35,840 epoch 15 - iter 428/2142 - loss 0.03878245 - time (sec): 37.82 - samples/sec: 2697.07 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:24:55,012 epoch 15 - iter 642/2142 - loss 0.04092066 - time (sec): 56.99 - samples/sec: 2667.88 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:25:14,225 epoch 15 - iter 856/2142 - loss 0.04237978 - time (sec): 76.20 - samples/sec: 2653.85 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:25:32,825 epoch 15 - iter 1070/2142 - loss 0.04335266 - time (sec): 94.80 - samples/sec: 2651.32 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:25:51,637 epoch 15 - iter 1284/2142 - loss 0.04295783 - time (sec): 113.61 - samples/sec: 2652.81 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:26:10,600 epoch 15 - iter 1498/2142 - loss 0.04348287 - time (sec): 132.58 - samples/sec: 2653.26 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:26:29,365 epoch 15 - iter 1712/2142 - loss 0.04311866 - time (sec): 151.34 - samples/sec: 2659.33 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:26:48,575 epoch 15 - iter 1926/2142 - loss 0.04300018 - time (sec): 170.55 - samples/sec: 2664.31 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:27:07,896 epoch 15 - iter 2140/2142 - loss 0.04276072 - time (sec): 189.87 - samples/sec: 2662.48 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:27:08,039 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:27:08,040 EPOCH 15 done: loss 0.0428 - lr: 0.000005 +2024-10-01 23:27:17,717 DEV : loss 0.12032926827669144 - f1-score (micro avg) 0.9029 +2024-10-01 23:27:17,751 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:27:36,455 epoch 16 - iter 214/2142 - loss 0.03889917 - time (sec): 18.70 - samples/sec: 2654.63 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:27:56,061 epoch 16 - iter 428/2142 - loss 0.03834689 - time (sec): 38.31 - samples/sec: 2666.80 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:28:15,410 epoch 16 - iter 642/2142 - loss 0.03681841 - time (sec): 57.66 - samples/sec: 2659.49 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:28:34,223 epoch 16 - iter 856/2142 - loss 0.03647143 - time (sec): 76.47 - samples/sec: 2646.64 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:28:53,262 epoch 16 - iter 1070/2142 - loss 0.03600140 - time (sec): 95.51 - samples/sec: 2648.93 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:29:12,386 epoch 16 - iter 1284/2142 - loss 0.03640236 - time (sec): 114.63 - samples/sec: 2642.54 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:29:31,518 epoch 16 - iter 1498/2142 - loss 0.03650184 - time (sec): 133.77 - samples/sec: 2644.78 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:29:50,287 epoch 16 - iter 1712/2142 - loss 0.03616281 - time (sec): 152.53 - samples/sec: 2648.52 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:30:09,553 epoch 16 - iter 1926/2142 - loss 0.03626548 - time (sec): 171.80 - samples/sec: 2646.13 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:30:28,611 epoch 16 - iter 2140/2142 - loss 0.03605380 - time (sec): 190.86 - samples/sec: 2648.86 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:30:28,766 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:30:28,767 EPOCH 16 done: loss 0.0360 - lr: 0.000005 +2024-10-01 23:30:39,806 DEV : loss 0.13169404864311218 - f1-score (micro avg) 0.9105 +2024-10-01 23:30:39,837 saving best model +2024-10-01 23:30:43,320 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:31:02,713 epoch 17 - iter 214/2142 - loss 0.03366172 - time (sec): 19.39 - samples/sec: 2627.49 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:31:21,519 epoch 17 - iter 428/2142 - loss 0.03085555 - time (sec): 38.20 - samples/sec: 2634.49 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:31:40,494 epoch 17 - iter 642/2142 - loss 0.03203921 - time (sec): 57.17 - samples/sec: 2668.08 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:31:59,603 epoch 17 - iter 856/2142 - loss 0.03136402 - time (sec): 76.28 - samples/sec: 2655.45 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:32:19,035 epoch 17 - iter 1070/2142 - loss 0.03114437 - time (sec): 95.71 - samples/sec: 2642.93 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:32:38,313 epoch 17 - iter 1284/2142 - loss 0.03143270 - time (sec): 114.99 - samples/sec: 2646.30 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:32:57,129 epoch 17 - iter 1498/2142 - loss 0.03171105 - time (sec): 133.81 - samples/sec: 2649.82 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:33:15,656 epoch 17 - iter 1712/2142 - loss 0.03182284 - time (sec): 152.33 - samples/sec: 2659.36 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:33:34,422 epoch 17 - iter 1926/2142 - loss 0.03172928 - time (sec): 171.10 - samples/sec: 2662.52 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:33:53,436 epoch 17 - iter 2140/2142 - loss 0.03213572 - time (sec): 190.11 - samples/sec: 2659.08 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:33:53,591 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:33:53,591 EPOCH 17 done: loss 0.0322 - lr: 0.000005 +2024-10-01 23:34:04,580 DEV : loss 0.13709977269172668 - f1-score (micro avg) 0.9124 +2024-10-01 23:34:04,613 saving best model +2024-10-01 23:34:07,935 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:34:26,863 epoch 18 - iter 214/2142 - loss 0.02793448 - time (sec): 18.93 - samples/sec: 2668.62 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:34:45,737 epoch 18 - iter 428/2142 - loss 0.02763264 - time (sec): 37.80 - samples/sec: 2653.52 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:35:04,705 epoch 18 - iter 642/2142 - loss 0.02585077 - time (sec): 56.77 - samples/sec: 2672.53 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:35:23,796 epoch 18 - iter 856/2142 - loss 0.02679249 - time (sec): 75.86 - samples/sec: 2667.27 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:35:42,789 epoch 18 - iter 1070/2142 - loss 0.02706965 - time (sec): 94.85 - samples/sec: 2662.17 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:36:01,874 epoch 18 - iter 1284/2142 - loss 0.02764910 - time (sec): 113.94 - samples/sec: 2664.02 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:36:20,871 epoch 18 - iter 1498/2142 - loss 0.02767502 - time (sec): 132.93 - samples/sec: 2657.80 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:36:39,661 epoch 18 - iter 1712/2142 - loss 0.02770831 - time (sec): 151.72 - samples/sec: 2664.60 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:36:58,825 epoch 18 - iter 1926/2142 - loss 0.02809857 - time (sec): 170.89 - samples/sec: 2658.73 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:37:17,634 epoch 18 - iter 2140/2142 - loss 0.02858359 - time (sec): 189.70 - samples/sec: 2664.90 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:37:17,778 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:37:17,778 EPOCH 18 done: loss 0.0286 - lr: 0.000005 +2024-10-01 23:37:27,653 DEV : loss 0.150344580411911 - f1-score (micro avg) 0.9085 +2024-10-01 23:37:27,693 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:37:46,903 epoch 19 - iter 214/2142 - loss 0.02316424 - time (sec): 19.21 - samples/sec: 2657.05 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:38:05,657 epoch 19 - iter 428/2142 - loss 0.02320508 - time (sec): 37.96 - samples/sec: 2663.41 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:38:25,083 epoch 19 - iter 642/2142 - loss 0.02407436 - time (sec): 57.39 - samples/sec: 2656.51 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:38:44,548 epoch 19 - iter 856/2142 - loss 0.02527761 - time (sec): 76.85 - samples/sec: 2650.77 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:39:03,251 epoch 19 - iter 1070/2142 - loss 0.02510368 - time (sec): 95.56 - samples/sec: 2654.46 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:39:22,450 epoch 19 - iter 1284/2142 - loss 0.02558588 - time (sec): 114.76 - samples/sec: 2644.28 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:39:41,029 epoch 19 - iter 1498/2142 - loss 0.02598586 - time (sec): 133.33 - samples/sec: 2643.32 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:40:00,139 epoch 19 - iter 1712/2142 - loss 0.02625987 - time (sec): 152.44 - samples/sec: 2645.56 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:40:19,363 epoch 19 - iter 1926/2142 - loss 0.02578546 - time (sec): 171.67 - samples/sec: 2649.45 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:40:38,268 epoch 19 - iter 2140/2142 - loss 0.02566516 - time (sec): 190.57 - samples/sec: 2652.57 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:40:38,424 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:40:38,425 EPOCH 19 done: loss 0.0257 - lr: 0.000005 +2024-10-01 23:40:48,839 DEV : loss 0.16350413858890533 - f1-score (micro avg) 0.9093 +2024-10-01 23:40:48,874 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:41:08,319 epoch 20 - iter 214/2142 - loss 0.02003721 - time (sec): 19.44 - samples/sec: 2622.46 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:41:27,413 epoch 20 - iter 428/2142 - loss 0.02083737 - time (sec): 38.54 - samples/sec: 2649.65 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:41:46,375 epoch 20 - iter 642/2142 - loss 0.02199312 - time (sec): 57.50 - samples/sec: 2648.83 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:42:05,176 epoch 20 - iter 856/2142 - loss 0.02193759 - time (sec): 76.30 - samples/sec: 2641.07 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:42:24,590 epoch 20 - iter 1070/2142 - loss 0.02247482 - time (sec): 95.71 - samples/sec: 2647.14 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:42:43,263 epoch 20 - iter 1284/2142 - loss 0.02285395 - time (sec): 114.39 - samples/sec: 2656.10 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:43:02,160 epoch 20 - iter 1498/2142 - loss 0.02297484 - time (sec): 133.28 - samples/sec: 2655.70 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:43:21,020 epoch 20 - iter 1712/2142 - loss 0.02326937 - time (sec): 152.14 - samples/sec: 2658.50 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:43:40,136 epoch 20 - iter 1926/2142 - loss 0.02318065 - time (sec): 171.26 - samples/sec: 2661.95 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:43:59,219 epoch 20 - iter 2140/2142 - loss 0.02302771 - time (sec): 190.34 - samples/sec: 2655.77 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:43:59,370 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:43:59,371 EPOCH 20 done: loss 0.0230 - lr: 0.000005 +2024-10-01 23:44:09,945 DEV : loss 0.17162354290485382 - f1-score (micro avg) 0.9106 +2024-10-01 23:44:09,972 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:44:29,531 epoch 21 - iter 214/2142 - loss 0.02030349 - time (sec): 19.56 - samples/sec: 2679.59 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:44:47,900 epoch 21 - iter 428/2142 - loss 0.02104000 - time (sec): 37.93 - samples/sec: 2698.06 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:45:06,729 epoch 21 - iter 642/2142 - loss 0.02176651 - time (sec): 56.76 - samples/sec: 2691.96 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:45:25,981 epoch 21 - iter 856/2142 - loss 0.02117020 - time (sec): 76.01 - samples/sec: 2680.66 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:45:45,207 epoch 21 - iter 1070/2142 - loss 0.02184369 - time (sec): 95.23 - samples/sec: 2668.87 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:46:03,952 epoch 21 - iter 1284/2142 - loss 0.02118668 - time (sec): 113.98 - samples/sec: 2668.96 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:46:22,424 epoch 21 - iter 1498/2142 - loss 0.02134688 - time (sec): 132.45 - samples/sec: 2672.57 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:46:41,603 epoch 21 - iter 1712/2142 - loss 0.02159550 - time (sec): 151.63 - samples/sec: 2668.71 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:47:00,687 epoch 21 - iter 1926/2142 - loss 0.02161313 - time (sec): 170.71 - samples/sec: 2668.34 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:47:19,426 epoch 21 - iter 2140/2142 - loss 0.02139433 - time (sec): 189.45 - samples/sec: 2668.31 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:47:19,579 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:47:19,580 EPOCH 21 done: loss 0.0214 - lr: 0.000005 +2024-10-01 23:47:29,006 DEV : loss 0.19522204995155334 - f1-score (micro avg) 0.9091 +2024-10-01 23:47:29,033 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:47:47,994 epoch 22 - iter 214/2142 - loss 0.02074850 - time (sec): 18.96 - samples/sec: 2684.85 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:48:06,677 epoch 22 - iter 428/2142 - loss 0.02006290 - time (sec): 37.64 - samples/sec: 2710.87 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:48:25,072 epoch 22 - iter 642/2142 - loss 0.02066122 - time (sec): 56.04 - samples/sec: 2720.87 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:48:43,696 epoch 22 - iter 856/2142 - loss 0.02025823 - time (sec): 74.66 - samples/sec: 2716.09 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:49:02,406 epoch 22 - iter 1070/2142 - loss 0.01961850 - time (sec): 93.37 - samples/sec: 2701.09 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:49:21,401 epoch 22 - iter 1284/2142 - loss 0.02003157 - time (sec): 112.37 - samples/sec: 2695.51 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:49:39,967 epoch 22 - iter 1498/2142 - loss 0.01955630 - time (sec): 130.93 - samples/sec: 2695.23 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:49:58,707 epoch 22 - iter 1712/2142 - loss 0.01920635 - time (sec): 149.67 - samples/sec: 2700.54 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:50:17,156 epoch 22 - iter 1926/2142 - loss 0.01944757 - time (sec): 168.12 - samples/sec: 2709.15 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:50:35,305 epoch 22 - iter 2140/2142 - loss 0.01959164 - time (sec): 186.27 - samples/sec: 2713.68 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:50:35,468 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:50:35,468 EPOCH 22 done: loss 0.0196 - lr: 0.000005 +2024-10-01 23:50:45,280 DEV : loss 0.18889281153678894 - f1-score (micro avg) 0.9104 +2024-10-01 23:50:45,312 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:51:04,114 epoch 23 - iter 214/2142 - loss 0.01196885 - time (sec): 18.80 - samples/sec: 2674.96 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:51:23,192 epoch 23 - iter 428/2142 - loss 0.01267112 - time (sec): 37.88 - samples/sec: 2700.71 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:51:41,181 epoch 23 - iter 642/2142 - loss 0.01485965 - time (sec): 55.87 - samples/sec: 2720.64 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:51:59,537 epoch 23 - iter 856/2142 - loss 0.01558384 - time (sec): 74.22 - samples/sec: 2729.32 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:52:17,976 epoch 23 - iter 1070/2142 - loss 0.01645496 - time (sec): 92.66 - samples/sec: 2723.89 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:52:36,793 epoch 23 - iter 1284/2142 - loss 0.01704762 - time (sec): 111.48 - samples/sec: 2718.23 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:52:55,385 epoch 23 - iter 1498/2142 - loss 0.01716325 - time (sec): 130.07 - samples/sec: 2717.69 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:53:13,792 epoch 23 - iter 1712/2142 - loss 0.01763997 - time (sec): 148.48 - samples/sec: 2716.04 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:53:32,439 epoch 23 - iter 1926/2142 - loss 0.01741144 - time (sec): 167.13 - samples/sec: 2720.20 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:53:50,827 epoch 23 - iter 2140/2142 - loss 0.01746344 - time (sec): 185.51 - samples/sec: 2724.95 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:53:50,997 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:53:50,998 EPOCH 23 done: loss 0.0175 - lr: 0.000005 +2024-10-01 23:54:00,559 DEV : loss 0.20625196397304535 - f1-score (micro avg) 0.9101 +2024-10-01 23:54:00,588 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:54:19,204 epoch 24 - iter 214/2142 - loss 0.01263909 - time (sec): 18.61 - samples/sec: 2748.40 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:54:37,451 epoch 24 - iter 428/2142 - loss 0.01398118 - time (sec): 36.86 - samples/sec: 2717.67 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:54:56,511 epoch 24 - iter 642/2142 - loss 0.01603485 - time (sec): 55.92 - samples/sec: 2715.88 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:55:14,796 epoch 24 - iter 856/2142 - loss 0.01644696 - time (sec): 74.21 - samples/sec: 2726.75 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:55:32,941 epoch 24 - iter 1070/2142 - loss 0.01688422 - time (sec): 92.35 - samples/sec: 2738.58 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:55:51,454 epoch 24 - iter 1284/2142 - loss 0.01643507 - time (sec): 110.86 - samples/sec: 2725.78 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:56:09,501 epoch 24 - iter 1498/2142 - loss 0.01680441 - time (sec): 128.91 - samples/sec: 2728.47 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:56:28,200 epoch 24 - iter 1712/2142 - loss 0.01717148 - time (sec): 147.61 - samples/sec: 2730.89 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:56:46,784 epoch 24 - iter 1926/2142 - loss 0.01699205 - time (sec): 166.19 - samples/sec: 2735.03 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:57:05,754 epoch 24 - iter 2140/2142 - loss 0.01720490 - time (sec): 185.16 - samples/sec: 2729.69 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:57:05,913 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:57:05,913 EPOCH 24 done: loss 0.0172 - lr: 0.000005 +2024-10-01 23:57:15,075 DEV : loss 0.20583805441856384 - f1-score (micro avg) 0.9122 +2024-10-01 23:57:15,102 ---------------------------------------------------------------------------------------------------- +2024-10-01 23:57:33,767 epoch 25 - iter 214/2142 - loss 0.01605558 - time (sec): 18.66 - samples/sec: 2735.93 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:57:52,395 epoch 25 - iter 428/2142 - loss 0.01452321 - time (sec): 37.29 - samples/sec: 2727.14 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:58:11,371 epoch 25 - iter 642/2142 - loss 0.01624131 - time (sec): 56.27 - samples/sec: 2709.60 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:58:29,617 epoch 25 - iter 856/2142 - loss 0.01638754 - time (sec): 74.51 - samples/sec: 2719.53 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:58:47,805 epoch 25 - iter 1070/2142 - loss 0.01681312 - time (sec): 92.70 - samples/sec: 2715.04 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:59:06,073 epoch 25 - iter 1284/2142 - loss 0.01677582 - time (sec): 110.97 - samples/sec: 2713.24 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:59:24,819 epoch 25 - iter 1498/2142 - loss 0.01671292 - time (sec): 129.72 - samples/sec: 2725.09 - lr: 0.000005 - momentum: 0.000000 +2024-10-01 23:59:43,346 epoch 25 - iter 1712/2142 - loss 0.01729621 - time (sec): 148.24 - samples/sec: 2723.52 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:00:02,091 epoch 25 - iter 1926/2142 - loss 0.01666171 - time (sec): 166.99 - samples/sec: 2724.39 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:00:20,771 epoch 25 - iter 2140/2142 - loss 0.01648001 - time (sec): 185.67 - samples/sec: 2722.21 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:00:20,956 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:00:20,957 EPOCH 25 done: loss 0.0165 - lr: 0.000005 +2024-10-02 00:00:30,856 DEV : loss 0.2190740555524826 - f1-score (micro avg) 0.9138 +2024-10-02 00:00:30,884 saving best model +2024-10-02 00:00:34,565 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:00:52,646 epoch 26 - iter 214/2142 - loss 0.01138792 - time (sec): 18.08 - samples/sec: 2708.68 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:01:10,851 epoch 26 - iter 428/2142 - loss 0.01267164 - time (sec): 36.28 - samples/sec: 2725.57 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:01:29,488 epoch 26 - iter 642/2142 - loss 0.01292301 - time (sec): 54.92 - samples/sec: 2741.05 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:01:48,273 epoch 26 - iter 856/2142 - loss 0.01283553 - time (sec): 73.71 - samples/sec: 2739.13 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:02:06,831 epoch 26 - iter 1070/2142 - loss 0.01332320 - time (sec): 92.26 - samples/sec: 2736.81 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:02:25,349 epoch 26 - iter 1284/2142 - loss 0.01328849 - time (sec): 110.78 - samples/sec: 2732.62 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:02:43,945 epoch 26 - iter 1498/2142 - loss 0.01320433 - time (sec): 129.38 - samples/sec: 2736.48 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:03:02,296 epoch 26 - iter 1712/2142 - loss 0.01313338 - time (sec): 147.73 - samples/sec: 2727.79 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:03:20,806 epoch 26 - iter 1926/2142 - loss 0.01365576 - time (sec): 166.24 - samples/sec: 2733.05 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:03:39,506 epoch 26 - iter 2140/2142 - loss 0.01374340 - time (sec): 184.94 - samples/sec: 2733.29 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:03:39,658 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:03:39,658 EPOCH 26 done: loss 0.0137 - lr: 0.000005 +2024-10-02 00:03:49,506 DEV : loss 0.22980715334415436 - f1-score (micro avg) 0.9144 +2024-10-02 00:03:49,534 saving best model +2024-10-02 00:03:52,806 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:04:11,372 epoch 27 - iter 214/2142 - loss 0.01089633 - time (sec): 18.56 - samples/sec: 2783.95 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:04:30,212 epoch 27 - iter 428/2142 - loss 0.01391137 - time (sec): 37.40 - samples/sec: 2722.13 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:04:48,801 epoch 27 - iter 642/2142 - loss 0.01225850 - time (sec): 55.99 - samples/sec: 2724.95 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:05:07,566 epoch 27 - iter 856/2142 - loss 0.01202856 - time (sec): 74.76 - samples/sec: 2732.34 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:05:25,870 epoch 27 - iter 1070/2142 - loss 0.01289505 - time (sec): 93.06 - samples/sec: 2740.14 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:05:44,019 epoch 27 - iter 1284/2142 - loss 0.01297610 - time (sec): 111.21 - samples/sec: 2739.80 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:06:02,779 epoch 27 - iter 1498/2142 - loss 0.01299541 - time (sec): 129.97 - samples/sec: 2736.79 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:06:21,500 epoch 27 - iter 1712/2142 - loss 0.01242972 - time (sec): 148.69 - samples/sec: 2733.81 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:06:39,812 epoch 27 - iter 1926/2142 - loss 0.01248215 - time (sec): 167.00 - samples/sec: 2735.47 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:06:58,395 epoch 27 - iter 2140/2142 - loss 0.01290737 - time (sec): 185.59 - samples/sec: 2723.91 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:06:58,560 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:06:58,561 EPOCH 27 done: loss 0.0129 - lr: 0.000005 +2024-10-02 00:07:07,506 DEV : loss 0.2295040339231491 - f1-score (micro avg) 0.907 +2024-10-02 00:07:07,532 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:07:25,698 epoch 28 - iter 214/2142 - loss 0.01004126 - time (sec): 18.16 - samples/sec: 2771.93 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:07:44,293 epoch 28 - iter 428/2142 - loss 0.01100848 - time (sec): 36.76 - samples/sec: 2742.46 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:08:03,180 epoch 28 - iter 642/2142 - loss 0.01169613 - time (sec): 55.65 - samples/sec: 2703.52 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:08:21,895 epoch 28 - iter 856/2142 - loss 0.01140129 - time (sec): 74.36 - samples/sec: 2715.03 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:08:40,295 epoch 28 - iter 1070/2142 - loss 0.01195760 - time (sec): 92.76 - samples/sec: 2712.73 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:08:59,079 epoch 28 - iter 1284/2142 - loss 0.01191201 - time (sec): 111.55 - samples/sec: 2711.74 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:09:17,820 epoch 28 - iter 1498/2142 - loss 0.01189915 - time (sec): 130.29 - samples/sec: 2713.19 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:09:36,373 epoch 28 - iter 1712/2142 - loss 0.01146034 - time (sec): 148.84 - samples/sec: 2710.93 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:09:54,991 epoch 28 - iter 1926/2142 - loss 0.01127094 - time (sec): 167.46 - samples/sec: 2718.15 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:10:13,403 epoch 28 - iter 2140/2142 - loss 0.01132752 - time (sec): 185.87 - samples/sec: 2719.62 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:10:13,549 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:10:13,549 EPOCH 28 done: loss 0.0113 - lr: 0.000005 +2024-10-02 00:10:23,262 DEV : loss 0.2260848730802536 - f1-score (micro avg) 0.9092 +2024-10-02 00:10:23,293 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:10:42,069 epoch 29 - iter 214/2142 - loss 0.00982167 - time (sec): 18.78 - samples/sec: 2745.98 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:11:00,712 epoch 29 - iter 428/2142 - loss 0.00944199 - time (sec): 37.42 - samples/sec: 2731.33 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:11:19,644 epoch 29 - iter 642/2142 - loss 0.00950900 - time (sec): 56.35 - samples/sec: 2710.46 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:11:38,650 epoch 29 - iter 856/2142 - loss 0.01072979 - time (sec): 75.36 - samples/sec: 2715.18 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:11:57,090 epoch 29 - iter 1070/2142 - loss 0.01031082 - time (sec): 93.80 - samples/sec: 2714.72 - lr: 0.000005 - momentum: 0.000000 +2024-10-02 00:12:16,056 epoch 29 - iter 1284/2142 - loss 0.01020073 - time (sec): 112.76 - samples/sec: 2710.43 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:12:34,416 epoch 29 - iter 1498/2142 - loss 0.01004988 - time (sec): 131.12 - samples/sec: 2711.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:12:52,669 epoch 29 - iter 1712/2142 - loss 0.01035371 - time (sec): 149.37 - samples/sec: 2719.61 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:13:10,795 epoch 29 - iter 1926/2142 - loss 0.01077245 - time (sec): 167.50 - samples/sec: 2723.11 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:13:28,881 epoch 29 - iter 2140/2142 - loss 0.01103769 - time (sec): 185.59 - samples/sec: 2723.90 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:13:29,043 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:13:29,044 EPOCH 29 done: loss 0.0110 - lr: 0.000004 +2024-10-02 00:13:38,800 DEV : loss 0.23393714427947998 - f1-score (micro avg) 0.9143 +2024-10-02 00:13:38,829 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:13:57,543 epoch 30 - iter 214/2142 - loss 0.00649527 - time (sec): 18.71 - samples/sec: 2697.19 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:14:15,814 epoch 30 - iter 428/2142 - loss 0.00852823 - time (sec): 36.98 - samples/sec: 2724.87 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:14:34,503 epoch 30 - iter 642/2142 - loss 0.00903096 - time (sec): 55.67 - samples/sec: 2719.99 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:14:53,383 epoch 30 - iter 856/2142 - loss 0.00867947 - time (sec): 74.55 - samples/sec: 2707.40 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:15:11,631 epoch 30 - iter 1070/2142 - loss 0.00887245 - time (sec): 92.80 - samples/sec: 2717.13 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:15:30,015 epoch 30 - iter 1284/2142 - loss 0.00864551 - time (sec): 111.18 - samples/sec: 2719.57 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:15:48,585 epoch 30 - iter 1498/2142 - loss 0.00914949 - time (sec): 129.75 - samples/sec: 2720.97 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:16:07,166 epoch 30 - iter 1712/2142 - loss 0.00970244 - time (sec): 148.34 - samples/sec: 2717.17 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:16:25,744 epoch 30 - iter 1926/2142 - loss 0.00989054 - time (sec): 166.91 - samples/sec: 2715.51 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:16:44,902 epoch 30 - iter 2140/2142 - loss 0.01004097 - time (sec): 186.07 - samples/sec: 2716.35 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:16:45,069 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:16:45,070 EPOCH 30 done: loss 0.0100 - lr: 0.000004 +2024-10-02 00:16:54,511 DEV : loss 0.23161225020885468 - f1-score (micro avg) 0.9154 +2024-10-02 00:16:54,540 saving best model +2024-10-02 00:16:58,367 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:17:17,092 epoch 31 - iter 214/2142 - loss 0.00946394 - time (sec): 18.72 - samples/sec: 2721.32 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:17:35,586 epoch 31 - iter 428/2142 - loss 0.01129475 - time (sec): 37.21 - samples/sec: 2714.39 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:17:54,148 epoch 31 - iter 642/2142 - loss 0.01196756 - time (sec): 55.77 - samples/sec: 2719.91 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:18:12,597 epoch 31 - iter 856/2142 - loss 0.01194497 - time (sec): 74.22 - samples/sec: 2718.53 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:18:31,057 epoch 31 - iter 1070/2142 - loss 0.01138620 - time (sec): 92.68 - samples/sec: 2719.34 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:18:49,502 epoch 31 - iter 1284/2142 - loss 0.01086054 - time (sec): 111.13 - samples/sec: 2719.83 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:19:08,360 epoch 31 - iter 1498/2142 - loss 0.01086464 - time (sec): 129.99 - samples/sec: 2715.23 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:19:26,796 epoch 31 - iter 1712/2142 - loss 0.01030726 - time (sec): 148.42 - samples/sec: 2717.35 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:19:45,221 epoch 31 - iter 1926/2142 - loss 0.01023873 - time (sec): 166.85 - samples/sec: 2727.21 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:20:04,094 epoch 31 - iter 2140/2142 - loss 0.01014967 - time (sec): 185.72 - samples/sec: 2721.45 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:20:04,249 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:20:04,250 EPOCH 31 done: loss 0.0101 - lr: 0.000004 +2024-10-02 00:20:13,942 DEV : loss 0.24021068215370178 - f1-score (micro avg) 0.9123 +2024-10-02 00:20:13,973 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:20:32,211 epoch 32 - iter 214/2142 - loss 0.00568941 - time (sec): 18.24 - samples/sec: 2728.13 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:20:51,171 epoch 32 - iter 428/2142 - loss 0.00555791 - time (sec): 37.20 - samples/sec: 2709.54 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:21:09,473 epoch 32 - iter 642/2142 - loss 0.00695211 - time (sec): 55.50 - samples/sec: 2725.53 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:21:28,402 epoch 32 - iter 856/2142 - loss 0.00756493 - time (sec): 74.43 - samples/sec: 2723.27 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:21:46,981 epoch 32 - iter 1070/2142 - loss 0.00752011 - time (sec): 93.01 - samples/sec: 2721.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:22:05,703 epoch 32 - iter 1284/2142 - loss 0.00768770 - time (sec): 111.73 - samples/sec: 2725.53 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:22:24,247 epoch 32 - iter 1498/2142 - loss 0.00802788 - time (sec): 130.27 - samples/sec: 2723.29 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:22:42,645 epoch 32 - iter 1712/2142 - loss 0.00832811 - time (sec): 148.67 - samples/sec: 2723.49 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:23:01,189 epoch 32 - iter 1926/2142 - loss 0.00835340 - time (sec): 167.21 - samples/sec: 2724.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:23:19,202 epoch 32 - iter 2140/2142 - loss 0.00839130 - time (sec): 185.23 - samples/sec: 2728.60 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:23:19,363 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:23:19,364 EPOCH 32 done: loss 0.0084 - lr: 0.000004 +2024-10-02 00:23:29,251 DEV : loss 0.2395702302455902 - f1-score (micro avg) 0.9111 +2024-10-02 00:23:29,284 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:23:47,608 epoch 33 - iter 214/2142 - loss 0.00614974 - time (sec): 18.32 - samples/sec: 2690.96 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:24:05,924 epoch 33 - iter 428/2142 - loss 0.00561933 - time (sec): 36.64 - samples/sec: 2729.19 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:24:24,268 epoch 33 - iter 642/2142 - loss 0.00628721 - time (sec): 54.98 - samples/sec: 2734.71 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:24:42,592 epoch 33 - iter 856/2142 - loss 0.00647317 - time (sec): 73.31 - samples/sec: 2735.31 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:25:01,351 epoch 33 - iter 1070/2142 - loss 0.00680311 - time (sec): 92.06 - samples/sec: 2729.29 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:25:19,731 epoch 33 - iter 1284/2142 - loss 0.00729190 - time (sec): 110.45 - samples/sec: 2725.33 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:25:38,738 epoch 33 - iter 1498/2142 - loss 0.00764838 - time (sec): 129.45 - samples/sec: 2725.80 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:25:57,619 epoch 33 - iter 1712/2142 - loss 0.00806488 - time (sec): 148.33 - samples/sec: 2721.57 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:26:16,403 epoch 33 - iter 1926/2142 - loss 0.00831405 - time (sec): 167.12 - samples/sec: 2722.23 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:26:34,838 epoch 33 - iter 2140/2142 - loss 0.00809460 - time (sec): 185.55 - samples/sec: 2724.55 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:26:34,981 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:26:34,982 EPOCH 33 done: loss 0.0081 - lr: 0.000004 +2024-10-02 00:26:43,951 DEV : loss 0.2492491602897644 - f1-score (micro avg) 0.9111 +2024-10-02 00:26:43,987 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:27:02,302 epoch 34 - iter 214/2142 - loss 0.00657427 - time (sec): 18.31 - samples/sec: 2785.69 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:27:20,944 epoch 34 - iter 428/2142 - loss 0.00752495 - time (sec): 36.96 - samples/sec: 2736.25 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:27:39,145 epoch 34 - iter 642/2142 - loss 0.00733752 - time (sec): 55.16 - samples/sec: 2725.32 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:27:58,323 epoch 34 - iter 856/2142 - loss 0.00760837 - time (sec): 74.33 - samples/sec: 2718.18 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:28:16,509 epoch 34 - iter 1070/2142 - loss 0.00746837 - time (sec): 92.52 - samples/sec: 2714.68 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:28:34,866 epoch 34 - iter 1284/2142 - loss 0.00703369 - time (sec): 110.88 - samples/sec: 2719.51 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:28:53,548 epoch 34 - iter 1498/2142 - loss 0.00708467 - time (sec): 129.56 - samples/sec: 2720.84 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:29:12,443 epoch 34 - iter 1712/2142 - loss 0.00726671 - time (sec): 148.45 - samples/sec: 2713.80 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:29:31,181 epoch 34 - iter 1926/2142 - loss 0.00717094 - time (sec): 167.19 - samples/sec: 2711.27 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:29:49,928 epoch 34 - iter 2140/2142 - loss 0.00722846 - time (sec): 185.94 - samples/sec: 2718.47 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:29:50,100 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:29:50,100 EPOCH 34 done: loss 0.0072 - lr: 0.000004 +2024-10-02 00:29:59,909 DEV : loss 0.251314252614975 - f1-score (micro avg) 0.911 +2024-10-02 00:29:59,936 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:30:18,525 epoch 35 - iter 214/2142 - loss 0.00484391 - time (sec): 18.59 - samples/sec: 2693.66 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:30:37,492 epoch 35 - iter 428/2142 - loss 0.00461059 - time (sec): 37.55 - samples/sec: 2683.25 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:30:55,981 epoch 35 - iter 642/2142 - loss 0.00511094 - time (sec): 56.04 - samples/sec: 2702.28 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:31:14,781 epoch 35 - iter 856/2142 - loss 0.00602892 - time (sec): 74.84 - samples/sec: 2710.48 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:31:33,097 epoch 35 - iter 1070/2142 - loss 0.00612479 - time (sec): 93.16 - samples/sec: 2718.48 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:31:51,590 epoch 35 - iter 1284/2142 - loss 0.00628143 - time (sec): 111.65 - samples/sec: 2714.53 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:32:10,194 epoch 35 - iter 1498/2142 - loss 0.00666093 - time (sec): 130.26 - samples/sec: 2715.89 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:32:28,731 epoch 35 - iter 1712/2142 - loss 0.00686666 - time (sec): 148.79 - samples/sec: 2710.20 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:32:47,666 epoch 35 - iter 1926/2142 - loss 0.00684655 - time (sec): 167.73 - samples/sec: 2712.12 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:33:06,094 epoch 35 - iter 2140/2142 - loss 0.00670604 - time (sec): 186.16 - samples/sec: 2714.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:33:06,264 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:33:06,265 EPOCH 35 done: loss 0.0067 - lr: 0.000004 +2024-10-02 00:33:15,934 DEV : loss 0.2599010169506073 - f1-score (micro avg) 0.9134 +2024-10-02 00:33:15,964 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:33:34,455 epoch 36 - iter 214/2142 - loss 0.00436349 - time (sec): 18.49 - samples/sec: 2694.63 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:33:52,988 epoch 36 - iter 428/2142 - loss 0.00494851 - time (sec): 37.02 - samples/sec: 2718.20 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:34:11,526 epoch 36 - iter 642/2142 - loss 0.00465747 - time (sec): 55.56 - samples/sec: 2720.14 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:34:30,121 epoch 36 - iter 856/2142 - loss 0.00459506 - time (sec): 74.16 - samples/sec: 2711.52 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:34:48,715 epoch 36 - iter 1070/2142 - loss 0.00532350 - time (sec): 92.75 - samples/sec: 2723.04 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:35:07,302 epoch 36 - iter 1284/2142 - loss 0.00602818 - time (sec): 111.34 - samples/sec: 2718.92 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:35:26,052 epoch 36 - iter 1498/2142 - loss 0.00583924 - time (sec): 130.09 - samples/sec: 2721.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:35:44,271 epoch 36 - iter 1712/2142 - loss 0.00595087 - time (sec): 148.31 - samples/sec: 2721.90 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:36:03,463 epoch 36 - iter 1926/2142 - loss 0.00617752 - time (sec): 167.50 - samples/sec: 2718.17 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:36:21,905 epoch 36 - iter 2140/2142 - loss 0.00621862 - time (sec): 185.94 - samples/sec: 2718.71 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:36:22,048 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:36:22,049 EPOCH 36 done: loss 0.0062 - lr: 0.000004 +2024-10-02 00:36:30,902 DEV : loss 0.25161662697792053 - f1-score (micro avg) 0.9162 +2024-10-02 00:36:30,934 saving best model +2024-10-02 00:36:34,405 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:36:53,051 epoch 37 - iter 214/2142 - loss 0.00365850 - time (sec): 18.64 - samples/sec: 2713.99 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:37:12,169 epoch 37 - iter 428/2142 - loss 0.00503114 - time (sec): 37.76 - samples/sec: 2718.68 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:37:29,985 epoch 37 - iter 642/2142 - loss 0.00651455 - time (sec): 55.57 - samples/sec: 2740.69 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:37:48,594 epoch 37 - iter 856/2142 - loss 0.00659711 - time (sec): 74.18 - samples/sec: 2742.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:38:07,237 epoch 37 - iter 1070/2142 - loss 0.00657920 - time (sec): 92.83 - samples/sec: 2733.91 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:38:25,878 epoch 37 - iter 1284/2142 - loss 0.00686503 - time (sec): 111.47 - samples/sec: 2728.43 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:38:44,182 epoch 37 - iter 1498/2142 - loss 0.00664046 - time (sec): 129.77 - samples/sec: 2733.04 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:39:02,315 epoch 37 - iter 1712/2142 - loss 0.00665286 - time (sec): 147.90 - samples/sec: 2735.22 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:39:20,890 epoch 37 - iter 1926/2142 - loss 0.00630726 - time (sec): 166.48 - samples/sec: 2737.29 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:39:39,311 epoch 37 - iter 2140/2142 - loss 0.00622773 - time (sec): 184.90 - samples/sec: 2733.79 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:39:39,453 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:39:39,454 EPOCH 37 done: loss 0.0062 - lr: 0.000004 +2024-10-02 00:39:49,212 DEV : loss 0.2608526349067688 - f1-score (micro avg) 0.9153 +2024-10-02 00:39:49,239 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:40:07,847 epoch 38 - iter 214/2142 - loss 0.00563491 - time (sec): 18.61 - samples/sec: 2703.20 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:40:26,698 epoch 38 - iter 428/2142 - loss 0.00518829 - time (sec): 37.46 - samples/sec: 2688.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:40:44,999 epoch 38 - iter 642/2142 - loss 0.00497558 - time (sec): 55.76 - samples/sec: 2710.29 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:41:03,626 epoch 38 - iter 856/2142 - loss 0.00598859 - time (sec): 74.39 - samples/sec: 2705.99 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:41:22,375 epoch 38 - iter 1070/2142 - loss 0.00578180 - time (sec): 93.13 - samples/sec: 2712.53 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:41:40,841 epoch 38 - iter 1284/2142 - loss 0.00542980 - time (sec): 111.60 - samples/sec: 2718.54 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:41:59,046 epoch 38 - iter 1498/2142 - loss 0.00568022 - time (sec): 129.80 - samples/sec: 2718.80 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:42:17,366 epoch 38 - iter 1712/2142 - loss 0.00566022 - time (sec): 148.13 - samples/sec: 2724.11 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:42:36,475 epoch 38 - iter 1926/2142 - loss 0.00569368 - time (sec): 167.23 - samples/sec: 2720.54 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:42:54,757 epoch 38 - iter 2140/2142 - loss 0.00574377 - time (sec): 185.52 - samples/sec: 2725.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:42:54,906 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:42:54,907 EPOCH 38 done: loss 0.0057 - lr: 0.000004 +2024-10-02 00:43:03,860 DEV : loss 0.2593044340610504 - f1-score (micro avg) 0.915 +2024-10-02 00:43:03,890 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:43:22,780 epoch 39 - iter 214/2142 - loss 0.00766924 - time (sec): 18.89 - samples/sec: 2701.49 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:43:41,529 epoch 39 - iter 428/2142 - loss 0.00577691 - time (sec): 37.64 - samples/sec: 2740.48 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:43:59,980 epoch 39 - iter 642/2142 - loss 0.00600236 - time (sec): 56.09 - samples/sec: 2737.27 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:44:18,637 epoch 39 - iter 856/2142 - loss 0.00528176 - time (sec): 74.75 - samples/sec: 2745.66 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:44:36,950 epoch 39 - iter 1070/2142 - loss 0.00507351 - time (sec): 93.06 - samples/sec: 2737.89 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:44:55,548 epoch 39 - iter 1284/2142 - loss 0.00490464 - time (sec): 111.66 - samples/sec: 2725.35 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:45:13,962 epoch 39 - iter 1498/2142 - loss 0.00539817 - time (sec): 130.07 - samples/sec: 2728.47 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:45:32,716 epoch 39 - iter 1712/2142 - loss 0.00534104 - time (sec): 148.82 - samples/sec: 2723.01 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:45:50,815 epoch 39 - iter 1926/2142 - loss 0.00555702 - time (sec): 166.92 - samples/sec: 2722.05 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:46:09,063 epoch 39 - iter 2140/2142 - loss 0.00535206 - time (sec): 185.17 - samples/sec: 2729.84 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:46:09,206 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:46:09,207 EPOCH 39 done: loss 0.0053 - lr: 0.000004 +2024-10-02 00:46:19,073 DEV : loss 0.26448681950569153 - f1-score (micro avg) 0.9151 +2024-10-02 00:46:19,100 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:46:37,416 epoch 40 - iter 214/2142 - loss 0.00313355 - time (sec): 18.32 - samples/sec: 2736.14 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:46:55,990 epoch 40 - iter 428/2142 - loss 0.00349773 - time (sec): 36.89 - samples/sec: 2729.84 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:47:14,518 epoch 40 - iter 642/2142 - loss 0.00425413 - time (sec): 55.42 - samples/sec: 2728.84 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:47:33,261 epoch 40 - iter 856/2142 - loss 0.00446481 - time (sec): 74.16 - samples/sec: 2735.26 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:47:51,600 epoch 40 - iter 1070/2142 - loss 0.00440899 - time (sec): 92.50 - samples/sec: 2734.96 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:48:10,350 epoch 40 - iter 1284/2142 - loss 0.00496908 - time (sec): 111.25 - samples/sec: 2735.08 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:48:29,363 epoch 40 - iter 1498/2142 - loss 0.00517989 - time (sec): 130.26 - samples/sec: 2725.64 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:48:47,809 epoch 40 - iter 1712/2142 - loss 0.00475453 - time (sec): 148.71 - samples/sec: 2721.80 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:49:06,734 epoch 40 - iter 1926/2142 - loss 0.00506308 - time (sec): 167.63 - samples/sec: 2718.78 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:49:25,039 epoch 40 - iter 2140/2142 - loss 0.00511467 - time (sec): 185.94 - samples/sec: 2718.24 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:49:25,201 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:49:25,202 EPOCH 40 done: loss 0.0051 - lr: 0.000004 +2024-10-02 00:49:34,830 DEV : loss 0.26380565762519836 - f1-score (micro avg) 0.9134 +2024-10-02 00:49:34,857 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:49:53,375 epoch 41 - iter 214/2142 - loss 0.00525231 - time (sec): 18.52 - samples/sec: 2759.25 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:50:11,599 epoch 41 - iter 428/2142 - loss 0.00513528 - time (sec): 36.74 - samples/sec: 2772.54 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:50:30,950 epoch 41 - iter 642/2142 - loss 0.00609033 - time (sec): 56.09 - samples/sec: 2759.24 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:50:49,097 epoch 41 - iter 856/2142 - loss 0.00544809 - time (sec): 74.24 - samples/sec: 2765.94 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:51:07,205 epoch 41 - iter 1070/2142 - loss 0.00516215 - time (sec): 92.35 - samples/sec: 2753.39 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:51:25,934 epoch 41 - iter 1284/2142 - loss 0.00516067 - time (sec): 111.08 - samples/sec: 2734.42 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:51:44,641 epoch 41 - iter 1498/2142 - loss 0.00493974 - time (sec): 129.78 - samples/sec: 2730.06 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:52:03,095 epoch 41 - iter 1712/2142 - loss 0.00512001 - time (sec): 148.24 - samples/sec: 2727.90 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:52:21,514 epoch 41 - iter 1926/2142 - loss 0.00501216 - time (sec): 166.66 - samples/sec: 2726.30 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:52:40,025 epoch 41 - iter 2140/2142 - loss 0.00469304 - time (sec): 185.17 - samples/sec: 2729.57 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:52:40,191 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:52:40,192 EPOCH 41 done: loss 0.0047 - lr: 0.000004 +2024-10-02 00:52:49,240 DEV : loss 0.27181363105773926 - f1-score (micro avg) 0.9163 +2024-10-02 00:52:49,267 saving best model +2024-10-02 00:52:52,793 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:53:11,703 epoch 42 - iter 214/2142 - loss 0.00440418 - time (sec): 18.91 - samples/sec: 2685.04 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:53:30,536 epoch 42 - iter 428/2142 - loss 0.00339264 - time (sec): 37.74 - samples/sec: 2664.10 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:53:48,917 epoch 42 - iter 642/2142 - loss 0.00376640 - time (sec): 56.12 - samples/sec: 2706.46 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:54:07,823 epoch 42 - iter 856/2142 - loss 0.00410837 - time (sec): 75.02 - samples/sec: 2710.17 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:54:26,007 epoch 42 - iter 1070/2142 - loss 0.00426283 - time (sec): 93.21 - samples/sec: 2716.99 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:54:44,333 epoch 42 - iter 1284/2142 - loss 0.00390786 - time (sec): 111.54 - samples/sec: 2716.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:55:02,342 epoch 42 - iter 1498/2142 - loss 0.00394963 - time (sec): 129.54 - samples/sec: 2722.21 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:55:20,844 epoch 42 - iter 1712/2142 - loss 0.00411100 - time (sec): 148.05 - samples/sec: 2728.16 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:55:39,804 epoch 42 - iter 1926/2142 - loss 0.00425759 - time (sec): 167.01 - samples/sec: 2721.34 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:55:58,459 epoch 42 - iter 2140/2142 - loss 0.00430478 - time (sec): 185.66 - samples/sec: 2723.00 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:55:58,607 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:55:58,607 EPOCH 42 done: loss 0.0043 - lr: 0.000004 +2024-10-02 00:56:08,513 DEV : loss 0.2770300805568695 - f1-score (micro avg) 0.9126 +2024-10-02 00:56:08,546 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:56:27,056 epoch 43 - iter 214/2142 - loss 0.00313049 - time (sec): 18.51 - samples/sec: 2722.88 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:56:45,450 epoch 43 - iter 428/2142 - loss 0.00313643 - time (sec): 36.90 - samples/sec: 2736.28 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:57:04,296 epoch 43 - iter 642/2142 - loss 0.00409971 - time (sec): 55.75 - samples/sec: 2717.60 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:57:22,785 epoch 43 - iter 856/2142 - loss 0.00357718 - time (sec): 74.24 - samples/sec: 2722.15 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:57:41,170 epoch 43 - iter 1070/2142 - loss 0.00379453 - time (sec): 92.62 - samples/sec: 2727.55 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:57:59,571 epoch 43 - iter 1284/2142 - loss 0.00403731 - time (sec): 111.02 - samples/sec: 2719.18 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:58:18,317 epoch 43 - iter 1498/2142 - loss 0.00413408 - time (sec): 129.77 - samples/sec: 2722.34 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:58:36,531 epoch 43 - iter 1712/2142 - loss 0.00429504 - time (sec): 147.98 - samples/sec: 2726.75 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:58:55,494 epoch 43 - iter 1926/2142 - loss 0.00435844 - time (sec): 166.95 - samples/sec: 2721.09 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:59:14,084 epoch 43 - iter 2140/2142 - loss 0.00457223 - time (sec): 185.54 - samples/sec: 2724.81 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 00:59:14,262 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:59:14,263 EPOCH 43 done: loss 0.0046 - lr: 0.000004 +2024-10-02 00:59:24,229 DEV : loss 0.2622102200984955 - f1-score (micro avg) 0.9152 +2024-10-02 00:59:24,262 ---------------------------------------------------------------------------------------------------- +2024-10-02 00:59:43,182 epoch 44 - iter 214/2142 - loss 0.00343479 - time (sec): 18.92 - samples/sec: 2714.85 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:00:01,849 epoch 44 - iter 428/2142 - loss 0.00328884 - time (sec): 37.59 - samples/sec: 2734.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:00:20,612 epoch 44 - iter 642/2142 - loss 0.00392026 - time (sec): 56.35 - samples/sec: 2715.59 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:00:38,873 epoch 44 - iter 856/2142 - loss 0.00416088 - time (sec): 74.61 - samples/sec: 2724.54 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:00:57,261 epoch 44 - iter 1070/2142 - loss 0.00380693 - time (sec): 93.00 - samples/sec: 2733.03 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:01:15,926 epoch 44 - iter 1284/2142 - loss 0.00359612 - time (sec): 111.66 - samples/sec: 2726.10 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:01:34,185 epoch 44 - iter 1498/2142 - loss 0.00362413 - time (sec): 129.92 - samples/sec: 2729.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:01:52,756 epoch 44 - iter 1712/2142 - loss 0.00388504 - time (sec): 148.49 - samples/sec: 2732.06 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:02:11,253 epoch 44 - iter 1926/2142 - loss 0.00408633 - time (sec): 166.99 - samples/sec: 2729.94 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:02:29,944 epoch 44 - iter 2140/2142 - loss 0.00438990 - time (sec): 185.68 - samples/sec: 2722.29 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:02:30,089 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:02:30,090 EPOCH 44 done: loss 0.0044 - lr: 0.000004 +2024-10-02 01:02:38,921 DEV : loss 0.27540066838264465 - f1-score (micro avg) 0.9144 +2024-10-02 01:02:38,953 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:02:57,473 epoch 45 - iter 214/2142 - loss 0.00349859 - time (sec): 18.52 - samples/sec: 2757.52 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:03:16,109 epoch 45 - iter 428/2142 - loss 0.00442377 - time (sec): 37.15 - samples/sec: 2732.99 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:03:34,667 epoch 45 - iter 642/2142 - loss 0.00423346 - time (sec): 55.71 - samples/sec: 2719.33 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:03:53,390 epoch 45 - iter 856/2142 - loss 0.00420943 - time (sec): 74.44 - samples/sec: 2723.60 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:04:11,844 epoch 45 - iter 1070/2142 - loss 0.00385504 - time (sec): 92.89 - samples/sec: 2721.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:04:30,425 epoch 45 - iter 1284/2142 - loss 0.00413673 - time (sec): 111.47 - samples/sec: 2720.78 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:04:49,169 epoch 45 - iter 1498/2142 - loss 0.00401127 - time (sec): 130.21 - samples/sec: 2716.98 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:05:07,735 epoch 45 - iter 1712/2142 - loss 0.00383169 - time (sec): 148.78 - samples/sec: 2723.60 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:05:26,422 epoch 45 - iter 1926/2142 - loss 0.00407317 - time (sec): 167.47 - samples/sec: 2721.40 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:05:44,708 epoch 45 - iter 2140/2142 - loss 0.00426792 - time (sec): 185.75 - samples/sec: 2721.58 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:05:44,855 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:05:44,856 EPOCH 45 done: loss 0.0043 - lr: 0.000004 +2024-10-02 01:05:54,453 DEV : loss 0.2830964922904968 - f1-score (micro avg) 0.9146 +2024-10-02 01:05:54,482 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:06:13,092 epoch 46 - iter 214/2142 - loss 0.00359222 - time (sec): 18.61 - samples/sec: 2719.94 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:06:31,695 epoch 46 - iter 428/2142 - loss 0.00264389 - time (sec): 37.21 - samples/sec: 2697.58 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:06:50,876 epoch 46 - iter 642/2142 - loss 0.00218045 - time (sec): 56.39 - samples/sec: 2684.62 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:07:09,366 epoch 46 - iter 856/2142 - loss 0.00248274 - time (sec): 74.88 - samples/sec: 2694.97 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:07:28,103 epoch 46 - iter 1070/2142 - loss 0.00278056 - time (sec): 93.62 - samples/sec: 2692.52 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:07:46,674 epoch 46 - iter 1284/2142 - loss 0.00297291 - time (sec): 112.19 - samples/sec: 2690.13 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:08:04,743 epoch 46 - iter 1498/2142 - loss 0.00303597 - time (sec): 130.26 - samples/sec: 2701.09 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:08:23,392 epoch 46 - iter 1712/2142 - loss 0.00330873 - time (sec): 148.91 - samples/sec: 2711.39 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:08:41,815 epoch 46 - iter 1926/2142 - loss 0.00355004 - time (sec): 167.33 - samples/sec: 2719.43 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:09:00,087 epoch 46 - iter 2140/2142 - loss 0.00354646 - time (sec): 185.60 - samples/sec: 2723.76 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:09:00,239 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:09:00,240 EPOCH 46 done: loss 0.0035 - lr: 0.000004 +2024-10-02 01:09:09,892 DEV : loss 0.28005966544151306 - f1-score (micro avg) 0.9123 +2024-10-02 01:09:09,923 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:09:28,546 epoch 47 - iter 214/2142 - loss 0.00366590 - time (sec): 18.62 - samples/sec: 2673.20 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:09:46,704 epoch 47 - iter 428/2142 - loss 0.00339615 - time (sec): 36.78 - samples/sec: 2734.96 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:10:05,830 epoch 47 - iter 642/2142 - loss 0.00304565 - time (sec): 55.91 - samples/sec: 2711.88 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:10:24,266 epoch 47 - iter 856/2142 - loss 0.00290872 - time (sec): 74.34 - samples/sec: 2723.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:10:42,858 epoch 47 - iter 1070/2142 - loss 0.00266562 - time (sec): 92.93 - samples/sec: 2721.62 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:11:01,614 epoch 47 - iter 1284/2142 - loss 0.00267079 - time (sec): 111.69 - samples/sec: 2715.03 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:11:20,697 epoch 47 - iter 1498/2142 - loss 0.00275219 - time (sec): 130.77 - samples/sec: 2720.75 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:11:39,424 epoch 47 - iter 1712/2142 - loss 0.00293029 - time (sec): 149.50 - samples/sec: 2719.66 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:11:57,539 epoch 47 - iter 1926/2142 - loss 0.00309721 - time (sec): 167.61 - samples/sec: 2721.93 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:12:15,955 epoch 47 - iter 2140/2142 - loss 0.00313697 - time (sec): 186.03 - samples/sec: 2717.62 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:12:16,097 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:12:16,097 EPOCH 47 done: loss 0.0031 - lr: 0.000004 +2024-10-02 01:12:25,898 DEV : loss 0.27503782510757446 - f1-score (micro avg) 0.916 +2024-10-02 01:12:25,925 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:12:44,954 epoch 48 - iter 214/2142 - loss 0.00166290 - time (sec): 19.03 - samples/sec: 2689.23 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:13:03,514 epoch 48 - iter 428/2142 - loss 0.00182893 - time (sec): 37.59 - samples/sec: 2725.86 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:13:22,106 epoch 48 - iter 642/2142 - loss 0.00227661 - time (sec): 56.18 - samples/sec: 2740.28 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:13:40,261 epoch 48 - iter 856/2142 - loss 0.00279999 - time (sec): 74.33 - samples/sec: 2737.14 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:13:58,766 epoch 48 - iter 1070/2142 - loss 0.00308609 - time (sec): 92.84 - samples/sec: 2727.11 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:14:17,203 epoch 48 - iter 1284/2142 - loss 0.00307349 - time (sec): 111.28 - samples/sec: 2725.39 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:14:35,591 epoch 48 - iter 1498/2142 - loss 0.00337137 - time (sec): 129.67 - samples/sec: 2726.95 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:14:54,051 epoch 48 - iter 1712/2142 - loss 0.00330664 - time (sec): 148.12 - samples/sec: 2730.46 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:15:12,297 epoch 48 - iter 1926/2142 - loss 0.00331701 - time (sec): 166.37 - samples/sec: 2732.35 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:15:30,886 epoch 48 - iter 2140/2142 - loss 0.00328103 - time (sec): 184.96 - samples/sec: 2733.17 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:15:31,033 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:15:31,033 EPOCH 48 done: loss 0.0033 - lr: 0.000004 +2024-10-02 01:15:39,898 DEV : loss 0.2749992907047272 - f1-score (micro avg) 0.9162 +2024-10-02 01:15:39,927 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:15:58,046 epoch 49 - iter 214/2142 - loss 0.00232474 - time (sec): 18.12 - samples/sec: 2753.51 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:16:16,640 epoch 49 - iter 428/2142 - loss 0.00281156 - time (sec): 36.71 - samples/sec: 2722.96 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:16:35,467 epoch 49 - iter 642/2142 - loss 0.00276529 - time (sec): 55.54 - samples/sec: 2724.55 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:16:54,535 epoch 49 - iter 856/2142 - loss 0.00319045 - time (sec): 74.61 - samples/sec: 2707.32 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:17:12,821 epoch 49 - iter 1070/2142 - loss 0.00313723 - time (sec): 92.89 - samples/sec: 2713.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:17:31,302 epoch 49 - iter 1284/2142 - loss 0.00312289 - time (sec): 111.37 - samples/sec: 2712.71 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:17:50,022 epoch 49 - iter 1498/2142 - loss 0.00326008 - time (sec): 130.09 - samples/sec: 2712.15 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:18:08,560 epoch 49 - iter 1712/2142 - loss 0.00336056 - time (sec): 148.63 - samples/sec: 2717.45 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:18:26,786 epoch 49 - iter 1926/2142 - loss 0.00316341 - time (sec): 166.86 - samples/sec: 2722.89 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:18:45,633 epoch 49 - iter 2140/2142 - loss 0.00320290 - time (sec): 185.70 - samples/sec: 2721.91 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:18:45,800 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:18:45,801 EPOCH 49 done: loss 0.0032 - lr: 0.000004 +2024-10-02 01:18:55,669 DEV : loss 0.2895350754261017 - f1-score (micro avg) 0.9165 +2024-10-02 01:18:55,695 saving best model +2024-10-02 01:18:59,314 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:19:18,161 epoch 50 - iter 214/2142 - loss 0.00521048 - time (sec): 18.85 - samples/sec: 2675.03 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:19:36,935 epoch 50 - iter 428/2142 - loss 0.00483621 - time (sec): 37.62 - samples/sec: 2692.06 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:19:55,549 epoch 50 - iter 642/2142 - loss 0.00448985 - time (sec): 56.23 - samples/sec: 2685.13 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:20:14,515 epoch 50 - iter 856/2142 - loss 0.00386134 - time (sec): 75.20 - samples/sec: 2692.36 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:20:33,316 epoch 50 - iter 1070/2142 - loss 0.00350725 - time (sec): 94.00 - samples/sec: 2694.10 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:20:51,751 epoch 50 - iter 1284/2142 - loss 0.00348183 - time (sec): 112.44 - samples/sec: 2694.90 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:21:09,922 epoch 50 - iter 1498/2142 - loss 0.00326681 - time (sec): 130.61 - samples/sec: 2699.26 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:21:28,296 epoch 50 - iter 1712/2142 - loss 0.00342514 - time (sec): 148.98 - samples/sec: 2709.76 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:21:46,589 epoch 50 - iter 1926/2142 - loss 0.00344486 - time (sec): 167.27 - samples/sec: 2711.63 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:22:05,324 epoch 50 - iter 2140/2142 - loss 0.00331479 - time (sec): 186.01 - samples/sec: 2717.17 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:22:05,486 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:22:05,487 EPOCH 50 done: loss 0.0033 - lr: 0.000004 +2024-10-02 01:22:14,583 DEV : loss 0.28629782795906067 - f1-score (micro avg) 0.9148 +2024-10-02 01:22:14,610 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:22:33,025 epoch 51 - iter 214/2142 - loss 0.00245481 - time (sec): 18.41 - samples/sec: 2662.02 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:22:51,561 epoch 51 - iter 428/2142 - loss 0.00289524 - time (sec): 36.95 - samples/sec: 2712.53 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:23:10,235 epoch 51 - iter 642/2142 - loss 0.00328541 - time (sec): 55.62 - samples/sec: 2732.83 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:23:28,762 epoch 51 - iter 856/2142 - loss 0.00306258 - time (sec): 74.15 - samples/sec: 2725.85 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:23:46,726 epoch 51 - iter 1070/2142 - loss 0.00279716 - time (sec): 92.12 - samples/sec: 2735.35 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:24:05,186 epoch 51 - iter 1284/2142 - loss 0.00290995 - time (sec): 110.57 - samples/sec: 2740.83 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:24:23,917 epoch 51 - iter 1498/2142 - loss 0.00294597 - time (sec): 129.31 - samples/sec: 2726.10 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:24:42,702 epoch 51 - iter 1712/2142 - loss 0.00323367 - time (sec): 148.09 - samples/sec: 2722.67 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:25:01,170 epoch 51 - iter 1926/2142 - loss 0.00302863 - time (sec): 166.56 - samples/sec: 2725.74 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:25:21,536 epoch 51 - iter 2140/2142 - loss 0.00307555 - time (sec): 186.93 - samples/sec: 2704.15 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:25:21,708 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:25:21,709 EPOCH 51 done: loss 0.0031 - lr: 0.000004 +2024-10-02 01:25:31,261 DEV : loss 0.28480154275894165 - f1-score (micro avg) 0.914 +2024-10-02 01:25:31,374 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:25:50,333 epoch 52 - iter 214/2142 - loss 0.00233802 - time (sec): 18.95 - samples/sec: 2719.68 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:26:08,945 epoch 52 - iter 428/2142 - loss 0.00221342 - time (sec): 37.56 - samples/sec: 2691.50 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:26:27,462 epoch 52 - iter 642/2142 - loss 0.00242784 - time (sec): 56.08 - samples/sec: 2695.54 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:26:46,482 epoch 52 - iter 856/2142 - loss 0.00250525 - time (sec): 75.10 - samples/sec: 2699.64 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:27:04,859 epoch 52 - iter 1070/2142 - loss 0.00245698 - time (sec): 93.48 - samples/sec: 2691.30 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:27:23,321 epoch 52 - iter 1284/2142 - loss 0.00242085 - time (sec): 111.94 - samples/sec: 2694.41 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:27:41,923 epoch 52 - iter 1498/2142 - loss 0.00243147 - time (sec): 130.54 - samples/sec: 2702.31 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:28:00,643 epoch 52 - iter 1712/2142 - loss 0.00255700 - time (sec): 149.26 - samples/sec: 2709.08 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:28:19,547 epoch 52 - iter 1926/2142 - loss 0.00282010 - time (sec): 168.17 - samples/sec: 2711.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:28:37,544 epoch 52 - iter 2140/2142 - loss 0.00291575 - time (sec): 186.16 - samples/sec: 2715.73 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:28:37,693 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:28:37,694 EPOCH 52 done: loss 0.0029 - lr: 0.000004 +2024-10-02 01:28:47,726 DEV : loss 0.2900029718875885 - f1-score (micro avg) 0.9152 +2024-10-02 01:28:47,753 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:29:06,156 epoch 53 - iter 214/2142 - loss 0.00315186 - time (sec): 18.40 - samples/sec: 2716.37 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:29:24,491 epoch 53 - iter 428/2142 - loss 0.00390034 - time (sec): 36.74 - samples/sec: 2725.78 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:29:43,223 epoch 53 - iter 642/2142 - loss 0.00386562 - time (sec): 55.47 - samples/sec: 2730.39 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:30:01,611 epoch 53 - iter 856/2142 - loss 0.00349001 - time (sec): 73.86 - samples/sec: 2727.70 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:30:20,085 epoch 53 - iter 1070/2142 - loss 0.00331928 - time (sec): 92.33 - samples/sec: 2727.67 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:30:38,994 epoch 53 - iter 1284/2142 - loss 0.00329969 - time (sec): 111.24 - samples/sec: 2723.87 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:30:57,512 epoch 53 - iter 1498/2142 - loss 0.00306450 - time (sec): 129.76 - samples/sec: 2723.69 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:31:16,487 epoch 53 - iter 1712/2142 - loss 0.00319161 - time (sec): 148.73 - samples/sec: 2719.61 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:31:35,213 epoch 53 - iter 1926/2142 - loss 0.00314349 - time (sec): 167.46 - samples/sec: 2720.45 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:31:53,437 epoch 53 - iter 2140/2142 - loss 0.00314593 - time (sec): 185.68 - samples/sec: 2722.49 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:31:53,589 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:31:53,589 EPOCH 53 done: loss 0.0031 - lr: 0.000004 +2024-10-02 01:32:03,242 DEV : loss 0.27878618240356445 - f1-score (micro avg) 0.9153 +2024-10-02 01:32:03,269 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:32:21,482 epoch 54 - iter 214/2142 - loss 0.00261236 - time (sec): 18.21 - samples/sec: 2821.35 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:32:39,956 epoch 54 - iter 428/2142 - loss 0.00247213 - time (sec): 36.69 - samples/sec: 2743.80 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:32:58,539 epoch 54 - iter 642/2142 - loss 0.00249814 - time (sec): 55.27 - samples/sec: 2730.26 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:33:17,644 epoch 54 - iter 856/2142 - loss 0.00265566 - time (sec): 74.37 - samples/sec: 2723.37 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:33:36,178 epoch 54 - iter 1070/2142 - loss 0.00229228 - time (sec): 92.91 - samples/sec: 2713.13 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:33:54,971 epoch 54 - iter 1284/2142 - loss 0.00238454 - time (sec): 111.70 - samples/sec: 2712.95 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:34:13,156 epoch 54 - iter 1498/2142 - loss 0.00213597 - time (sec): 129.89 - samples/sec: 2716.44 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:34:31,682 epoch 54 - iter 1712/2142 - loss 0.00219946 - time (sec): 148.41 - samples/sec: 2719.18 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:34:50,153 epoch 54 - iter 1926/2142 - loss 0.00221120 - time (sec): 166.88 - samples/sec: 2718.16 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:35:09,066 epoch 54 - iter 2140/2142 - loss 0.00230849 - time (sec): 185.80 - samples/sec: 2720.55 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:35:09,253 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:35:09,254 EPOCH 54 done: loss 0.0023 - lr: 0.000004 +2024-10-02 01:35:18,413 DEV : loss 0.2866401672363281 - f1-score (micro avg) 0.9161 +2024-10-02 01:35:18,441 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:35:36,683 epoch 55 - iter 214/2142 - loss 0.00117141 - time (sec): 18.24 - samples/sec: 2724.89 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:35:55,161 epoch 55 - iter 428/2142 - loss 0.00131042 - time (sec): 36.72 - samples/sec: 2731.97 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:36:14,171 epoch 55 - iter 642/2142 - loss 0.00230903 - time (sec): 55.73 - samples/sec: 2715.47 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:36:32,679 epoch 55 - iter 856/2142 - loss 0.00208613 - time (sec): 74.24 - samples/sec: 2733.19 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:36:51,345 epoch 55 - iter 1070/2142 - loss 0.00185828 - time (sec): 92.90 - samples/sec: 2726.13 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:37:10,068 epoch 55 - iter 1284/2142 - loss 0.00181809 - time (sec): 111.63 - samples/sec: 2724.96 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:37:28,708 epoch 55 - iter 1498/2142 - loss 0.00206086 - time (sec): 130.26 - samples/sec: 2726.00 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:37:46,723 epoch 55 - iter 1712/2142 - loss 0.00220196 - time (sec): 148.28 - samples/sec: 2727.18 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:38:05,929 epoch 55 - iter 1926/2142 - loss 0.00219954 - time (sec): 167.49 - samples/sec: 2717.47 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:38:24,548 epoch 55 - iter 2140/2142 - loss 0.00227325 - time (sec): 186.11 - samples/sec: 2715.84 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:38:24,699 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:38:24,700 EPOCH 55 done: loss 0.0023 - lr: 0.000004 +2024-10-02 01:38:34,376 DEV : loss 0.2876834273338318 - f1-score (micro avg) 0.9163 +2024-10-02 01:38:34,406 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:38:52,977 epoch 56 - iter 214/2142 - loss 0.00153047 - time (sec): 18.57 - samples/sec: 2724.97 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:39:11,149 epoch 56 - iter 428/2142 - loss 0.00180770 - time (sec): 36.74 - samples/sec: 2717.85 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:39:30,117 epoch 56 - iter 642/2142 - loss 0.00229214 - time (sec): 55.71 - samples/sec: 2720.55 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:39:48,401 epoch 56 - iter 856/2142 - loss 0.00207621 - time (sec): 73.99 - samples/sec: 2720.14 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:40:06,804 epoch 56 - iter 1070/2142 - loss 0.00207145 - time (sec): 92.40 - samples/sec: 2727.42 - lr: 0.000004 - momentum: 0.000000 +2024-10-02 01:40:25,125 epoch 56 - iter 1284/2142 - loss 0.00223871 - time (sec): 110.72 - samples/sec: 2729.36 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:40:43,681 epoch 56 - iter 1498/2142 - loss 0.00228773 - time (sec): 129.27 - samples/sec: 2731.08 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:41:02,331 epoch 56 - iter 1712/2142 - loss 0.00262369 - time (sec): 147.92 - samples/sec: 2729.81 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:41:21,293 epoch 56 - iter 1926/2142 - loss 0.00248775 - time (sec): 166.89 - samples/sec: 2727.63 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:41:39,720 epoch 56 - iter 2140/2142 - loss 0.00242569 - time (sec): 185.31 - samples/sec: 2727.50 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:41:39,919 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:41:39,919 EPOCH 56 done: loss 0.0024 - lr: 0.000003 +2024-10-02 01:41:49,593 DEV : loss 0.29505908489227295 - f1-score (micro avg) 0.9185 +2024-10-02 01:41:49,620 saving best model +2024-10-02 01:41:53,328 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:42:11,699 epoch 57 - iter 214/2142 - loss 0.00157779 - time (sec): 18.37 - samples/sec: 2770.45 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:42:29,886 epoch 57 - iter 428/2142 - loss 0.00183465 - time (sec): 36.55 - samples/sec: 2735.78 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:42:48,135 epoch 57 - iter 642/2142 - loss 0.00166368 - time (sec): 54.80 - samples/sec: 2742.26 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:43:07,065 epoch 57 - iter 856/2142 - loss 0.00250743 - time (sec): 73.73 - samples/sec: 2742.58 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:43:25,496 epoch 57 - iter 1070/2142 - loss 0.00269749 - time (sec): 92.16 - samples/sec: 2738.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:43:43,998 epoch 57 - iter 1284/2142 - loss 0.00249140 - time (sec): 110.66 - samples/sec: 2732.33 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:44:02,707 epoch 57 - iter 1498/2142 - loss 0.00248819 - time (sec): 129.37 - samples/sec: 2730.36 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:44:21,037 epoch 57 - iter 1712/2142 - loss 0.00242684 - time (sec): 147.70 - samples/sec: 2732.75 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:44:39,622 epoch 57 - iter 1926/2142 - loss 0.00244136 - time (sec): 166.29 - samples/sec: 2729.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:44:58,819 epoch 57 - iter 2140/2142 - loss 0.00234417 - time (sec): 185.49 - samples/sec: 2725.40 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:44:58,983 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:44:58,983 EPOCH 57 done: loss 0.0023 - lr: 0.000003 +2024-10-02 01:45:08,138 DEV : loss 0.293289452791214 - f1-score (micro avg) 0.9161 +2024-10-02 01:45:08,180 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:45:26,612 epoch 58 - iter 214/2142 - loss 0.00229000 - time (sec): 18.43 - samples/sec: 2773.19 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:45:45,271 epoch 58 - iter 428/2142 - loss 0.00205979 - time (sec): 37.09 - samples/sec: 2767.64 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:46:03,431 epoch 58 - iter 642/2142 - loss 0.00214679 - time (sec): 55.25 - samples/sec: 2767.57 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:46:22,087 epoch 58 - iter 856/2142 - loss 0.00190226 - time (sec): 73.91 - samples/sec: 2751.49 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:46:40,735 epoch 58 - iter 1070/2142 - loss 0.00201715 - time (sec): 92.55 - samples/sec: 2742.00 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:46:59,389 epoch 58 - iter 1284/2142 - loss 0.00225144 - time (sec): 111.21 - samples/sec: 2729.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:47:18,206 epoch 58 - iter 1498/2142 - loss 0.00219616 - time (sec): 130.02 - samples/sec: 2723.17 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:47:37,138 epoch 58 - iter 1712/2142 - loss 0.00215304 - time (sec): 148.96 - samples/sec: 2715.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:47:55,735 epoch 58 - iter 1926/2142 - loss 0.00215772 - time (sec): 167.55 - samples/sec: 2717.61 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:48:14,056 epoch 58 - iter 2140/2142 - loss 0.00219014 - time (sec): 185.87 - samples/sec: 2719.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:48:14,231 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:48:14,232 EPOCH 58 done: loss 0.0022 - lr: 0.000003 +2024-10-02 01:48:23,968 DEV : loss 0.2928188443183899 - f1-score (micro avg) 0.9186 +2024-10-02 01:48:23,995 saving best model +2024-10-02 01:48:27,630 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:48:46,130 epoch 59 - iter 214/2142 - loss 0.00236179 - time (sec): 18.50 - samples/sec: 2732.96 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:49:04,917 epoch 59 - iter 428/2142 - loss 0.00265681 - time (sec): 37.29 - samples/sec: 2736.25 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:49:23,260 epoch 59 - iter 642/2142 - loss 0.00280495 - time (sec): 55.63 - samples/sec: 2729.76 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:49:41,992 epoch 59 - iter 856/2142 - loss 0.00245477 - time (sec): 74.36 - samples/sec: 2724.03 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:50:00,318 epoch 59 - iter 1070/2142 - loss 0.00232793 - time (sec): 92.69 - samples/sec: 2718.81 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:50:18,515 epoch 59 - iter 1284/2142 - loss 0.00220348 - time (sec): 110.88 - samples/sec: 2719.63 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:50:37,171 epoch 59 - iter 1498/2142 - loss 0.00219650 - time (sec): 129.54 - samples/sec: 2718.28 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:50:56,026 epoch 59 - iter 1712/2142 - loss 0.00220152 - time (sec): 148.39 - samples/sec: 2713.19 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:51:15,239 epoch 59 - iter 1926/2142 - loss 0.00255499 - time (sec): 167.61 - samples/sec: 2714.98 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:51:33,652 epoch 59 - iter 2140/2142 - loss 0.00262364 - time (sec): 186.02 - samples/sec: 2717.83 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:51:33,796 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:51:33,797 EPOCH 59 done: loss 0.0026 - lr: 0.000003 +2024-10-02 01:51:43,495 DEV : loss 0.2922593057155609 - f1-score (micro avg) 0.9169 +2024-10-02 01:51:43,523 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:52:02,042 epoch 60 - iter 214/2142 - loss 0.00193658 - time (sec): 18.52 - samples/sec: 2721.59 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:52:20,894 epoch 60 - iter 428/2142 - loss 0.00223613 - time (sec): 37.37 - samples/sec: 2694.46 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:52:39,405 epoch 60 - iter 642/2142 - loss 0.00214542 - time (sec): 55.88 - samples/sec: 2713.46 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:52:57,988 epoch 60 - iter 856/2142 - loss 0.00240153 - time (sec): 74.46 - samples/sec: 2709.70 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:53:16,483 epoch 60 - iter 1070/2142 - loss 0.00238082 - time (sec): 92.96 - samples/sec: 2716.11 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:53:34,968 epoch 60 - iter 1284/2142 - loss 0.00241807 - time (sec): 111.44 - samples/sec: 2717.31 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:53:53,640 epoch 60 - iter 1498/2142 - loss 0.00217614 - time (sec): 130.12 - samples/sec: 2718.14 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:54:11,935 epoch 60 - iter 1712/2142 - loss 0.00216507 - time (sec): 148.41 - samples/sec: 2722.58 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:54:30,844 epoch 60 - iter 1926/2142 - loss 0.00198830 - time (sec): 167.32 - samples/sec: 2720.12 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:54:49,649 epoch 60 - iter 2140/2142 - loss 0.00201110 - time (sec): 186.12 - samples/sec: 2716.18 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:54:49,801 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:54:49,802 EPOCH 60 done: loss 0.0020 - lr: 0.000003 +2024-10-02 01:54:58,816 DEV : loss 0.3045124113559723 - f1-score (micro avg) 0.9147 +2024-10-02 01:54:58,844 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:55:17,395 epoch 61 - iter 214/2142 - loss 0.00281090 - time (sec): 18.55 - samples/sec: 2686.65 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:55:36,349 epoch 61 - iter 428/2142 - loss 0.00238753 - time (sec): 37.50 - samples/sec: 2702.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:55:54,647 epoch 61 - iter 642/2142 - loss 0.00255059 - time (sec): 55.80 - samples/sec: 2704.81 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:56:13,805 epoch 61 - iter 856/2142 - loss 0.00226998 - time (sec): 74.96 - samples/sec: 2709.76 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:56:32,217 epoch 61 - iter 1070/2142 - loss 0.00207614 - time (sec): 93.37 - samples/sec: 2718.61 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:56:50,599 epoch 61 - iter 1284/2142 - loss 0.00221288 - time (sec): 111.75 - samples/sec: 2720.83 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:57:08,984 epoch 61 - iter 1498/2142 - loss 0.00215915 - time (sec): 130.14 - samples/sec: 2714.87 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:57:27,565 epoch 61 - iter 1712/2142 - loss 0.00218587 - time (sec): 148.72 - samples/sec: 2714.36 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:57:46,260 epoch 61 - iter 1926/2142 - loss 0.00211392 - time (sec): 167.42 - samples/sec: 2714.47 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:58:04,930 epoch 61 - iter 2140/2142 - loss 0.00210578 - time (sec): 186.09 - samples/sec: 2716.49 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:58:05,100 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:58:05,101 EPOCH 61 done: loss 0.0021 - lr: 0.000003 +2024-10-02 01:58:15,037 DEV : loss 0.2957592010498047 - f1-score (micro avg) 0.9176 +2024-10-02 01:58:15,066 ---------------------------------------------------------------------------------------------------- +2024-10-02 01:58:33,617 epoch 62 - iter 214/2142 - loss 0.00132175 - time (sec): 18.55 - samples/sec: 2713.95 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:58:51,963 epoch 62 - iter 428/2142 - loss 0.00222927 - time (sec): 36.90 - samples/sec: 2710.10 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:59:10,685 epoch 62 - iter 642/2142 - loss 0.00195846 - time (sec): 55.62 - samples/sec: 2711.21 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:59:29,625 epoch 62 - iter 856/2142 - loss 0.00244110 - time (sec): 74.56 - samples/sec: 2696.25 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 01:59:48,225 epoch 62 - iter 1070/2142 - loss 0.00240111 - time (sec): 93.16 - samples/sec: 2703.93 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:00:06,913 epoch 62 - iter 1284/2142 - loss 0.00261754 - time (sec): 111.85 - samples/sec: 2707.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:00:25,187 epoch 62 - iter 1498/2142 - loss 0.00236306 - time (sec): 130.12 - samples/sec: 2715.18 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:00:43,849 epoch 62 - iter 1712/2142 - loss 0.00233329 - time (sec): 148.78 - samples/sec: 2716.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:01:02,123 epoch 62 - iter 1926/2142 - loss 0.00233828 - time (sec): 167.06 - samples/sec: 2711.18 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:01:21,461 epoch 62 - iter 2140/2142 - loss 0.00226555 - time (sec): 186.39 - samples/sec: 2712.02 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:01:21,628 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:01:21,628 EPOCH 62 done: loss 0.0023 - lr: 0.000003 +2024-10-02 02:01:31,548 DEV : loss 0.3035697937011719 - f1-score (micro avg) 0.9115 +2024-10-02 02:01:31,575 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:01:50,375 epoch 63 - iter 214/2142 - loss 0.00146167 - time (sec): 18.80 - samples/sec: 2721.64 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:02:08,990 epoch 63 - iter 428/2142 - loss 0.00095749 - time (sec): 37.41 - samples/sec: 2738.06 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:02:27,648 epoch 63 - iter 642/2142 - loss 0.00117360 - time (sec): 56.07 - samples/sec: 2735.43 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:02:46,134 epoch 63 - iter 856/2142 - loss 0.00116085 - time (sec): 74.56 - samples/sec: 2736.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:03:04,401 epoch 63 - iter 1070/2142 - loss 0.00146877 - time (sec): 92.82 - samples/sec: 2728.06 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:03:23,136 epoch 63 - iter 1284/2142 - loss 0.00162369 - time (sec): 111.56 - samples/sec: 2733.25 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:03:41,506 epoch 63 - iter 1498/2142 - loss 0.00173334 - time (sec): 129.93 - samples/sec: 2731.04 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:04:00,132 epoch 63 - iter 1712/2142 - loss 0.00169366 - time (sec): 148.56 - samples/sec: 2726.63 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:04:18,153 epoch 63 - iter 1926/2142 - loss 0.00171279 - time (sec): 166.58 - samples/sec: 2731.70 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:04:37,044 epoch 63 - iter 2140/2142 - loss 0.00167236 - time (sec): 185.47 - samples/sec: 2725.15 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:04:37,212 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:04:37,212 EPOCH 63 done: loss 0.0017 - lr: 0.000003 +2024-10-02 02:04:46,237 DEV : loss 0.29815372824668884 - f1-score (micro avg) 0.9167 +2024-10-02 02:04:46,268 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:05:04,953 epoch 64 - iter 214/2142 - loss 0.00196362 - time (sec): 18.68 - samples/sec: 2691.55 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:05:23,528 epoch 64 - iter 428/2142 - loss 0.00213079 - time (sec): 37.26 - samples/sec: 2718.61 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:05:41,471 epoch 64 - iter 642/2142 - loss 0.00182857 - time (sec): 55.20 - samples/sec: 2720.15 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:06:00,493 epoch 64 - iter 856/2142 - loss 0.00156826 - time (sec): 74.22 - samples/sec: 2715.95 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:06:18,715 epoch 64 - iter 1070/2142 - loss 0.00151752 - time (sec): 92.44 - samples/sec: 2717.19 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:06:37,615 epoch 64 - iter 1284/2142 - loss 0.00170438 - time (sec): 111.34 - samples/sec: 2716.06 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:06:56,113 epoch 64 - iter 1498/2142 - loss 0.00188020 - time (sec): 129.84 - samples/sec: 2718.60 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:07:14,830 epoch 64 - iter 1712/2142 - loss 0.00178058 - time (sec): 148.56 - samples/sec: 2712.32 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:07:34,160 epoch 64 - iter 1926/2142 - loss 0.00164354 - time (sec): 167.89 - samples/sec: 2710.93 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:07:52,486 epoch 64 - iter 2140/2142 - loss 0.00169589 - time (sec): 186.22 - samples/sec: 2713.85 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:07:52,663 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:07:52,663 EPOCH 64 done: loss 0.0017 - lr: 0.000003 +2024-10-02 02:08:02,738 DEV : loss 0.29712846875190735 - f1-score (micro avg) 0.9152 +2024-10-02 02:08:02,772 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:08:21,317 epoch 65 - iter 214/2142 - loss 0.00155402 - time (sec): 18.54 - samples/sec: 2780.93 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:08:40,003 epoch 65 - iter 428/2142 - loss 0.00228056 - time (sec): 37.23 - samples/sec: 2744.65 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:08:58,185 epoch 65 - iter 642/2142 - loss 0.00188606 - time (sec): 55.41 - samples/sec: 2738.36 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:09:16,840 epoch 65 - iter 856/2142 - loss 0.00183116 - time (sec): 74.07 - samples/sec: 2742.66 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:09:35,121 epoch 65 - iter 1070/2142 - loss 0.00186446 - time (sec): 92.35 - samples/sec: 2736.44 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:09:53,916 epoch 65 - iter 1284/2142 - loss 0.00207201 - time (sec): 111.14 - samples/sec: 2735.92 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:10:13,082 epoch 65 - iter 1498/2142 - loss 0.00211172 - time (sec): 130.31 - samples/sec: 2724.32 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:10:31,465 epoch 65 - iter 1712/2142 - loss 0.00214126 - time (sec): 148.69 - samples/sec: 2725.10 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:10:50,026 epoch 65 - iter 1926/2142 - loss 0.00216968 - time (sec): 167.25 - samples/sec: 2727.37 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:11:08,298 epoch 65 - iter 2140/2142 - loss 0.00212097 - time (sec): 185.52 - samples/sec: 2725.00 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:11:08,517 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:11:08,518 EPOCH 65 done: loss 0.0021 - lr: 0.000003 +2024-10-02 02:11:18,519 DEV : loss 0.29073023796081543 - f1-score (micro avg) 0.9158 +2024-10-02 02:11:18,547 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:11:37,057 epoch 66 - iter 214/2142 - loss 0.00067438 - time (sec): 18.51 - samples/sec: 2727.98 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:11:55,863 epoch 66 - iter 428/2142 - loss 0.00069807 - time (sec): 37.31 - samples/sec: 2706.20 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:12:15,024 epoch 66 - iter 642/2142 - loss 0.00108815 - time (sec): 56.47 - samples/sec: 2693.74 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:12:34,160 epoch 66 - iter 856/2142 - loss 0.00114086 - time (sec): 75.61 - samples/sec: 2688.13 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:12:52,280 epoch 66 - iter 1070/2142 - loss 0.00114319 - time (sec): 93.73 - samples/sec: 2698.57 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:13:10,647 epoch 66 - iter 1284/2142 - loss 0.00123563 - time (sec): 112.10 - samples/sec: 2704.05 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:13:28,923 epoch 66 - iter 1498/2142 - loss 0.00138519 - time (sec): 130.37 - samples/sec: 2710.98 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:13:47,359 epoch 66 - iter 1712/2142 - loss 0.00151477 - time (sec): 148.81 - samples/sec: 2717.88 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:14:06,210 epoch 66 - iter 1926/2142 - loss 0.00166134 - time (sec): 167.66 - samples/sec: 2715.47 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:14:25,021 epoch 66 - iter 2140/2142 - loss 0.00168675 - time (sec): 186.47 - samples/sec: 2710.65 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:14:25,173 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:14:25,173 EPOCH 66 done: loss 0.0017 - lr: 0.000003 +2024-10-02 02:14:34,182 DEV : loss 0.305977463722229 - f1-score (micro avg) 0.9162 +2024-10-02 02:14:34,209 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:14:52,851 epoch 67 - iter 214/2142 - loss 0.00087528 - time (sec): 18.64 - samples/sec: 2705.88 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:15:11,486 epoch 67 - iter 428/2142 - loss 0.00070361 - time (sec): 37.28 - samples/sec: 2708.78 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:15:29,994 epoch 67 - iter 642/2142 - loss 0.00080505 - time (sec): 55.78 - samples/sec: 2704.54 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:15:49,166 epoch 67 - iter 856/2142 - loss 0.00134895 - time (sec): 74.96 - samples/sec: 2711.70 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:16:07,337 epoch 67 - iter 1070/2142 - loss 0.00146055 - time (sec): 93.13 - samples/sec: 2709.95 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:16:26,181 epoch 67 - iter 1284/2142 - loss 0.00144427 - time (sec): 111.97 - samples/sec: 2711.68 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:16:44,351 epoch 67 - iter 1498/2142 - loss 0.00144669 - time (sec): 130.14 - samples/sec: 2717.20 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:17:02,765 epoch 67 - iter 1712/2142 - loss 0.00159112 - time (sec): 148.55 - samples/sec: 2717.76 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:17:21,717 epoch 67 - iter 1926/2142 - loss 0.00161751 - time (sec): 167.51 - samples/sec: 2712.85 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:17:40,039 epoch 67 - iter 2140/2142 - loss 0.00163256 - time (sec): 185.83 - samples/sec: 2720.21 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:17:40,191 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:17:40,192 EPOCH 67 done: loss 0.0016 - lr: 0.000003 +2024-10-02 02:17:49,978 DEV : loss 0.3058227598667145 - f1-score (micro avg) 0.9155 +2024-10-02 02:17:50,008 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:18:09,070 epoch 68 - iter 214/2142 - loss 0.00111550 - time (sec): 19.06 - samples/sec: 2683.67 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:18:27,751 epoch 68 - iter 428/2142 - loss 0.00143603 - time (sec): 37.74 - samples/sec: 2693.50 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:18:46,103 epoch 68 - iter 642/2142 - loss 0.00134681 - time (sec): 56.09 - samples/sec: 2719.37 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:19:04,554 epoch 68 - iter 856/2142 - loss 0.00136284 - time (sec): 74.54 - samples/sec: 2703.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:19:23,278 epoch 68 - iter 1070/2142 - loss 0.00146718 - time (sec): 93.27 - samples/sec: 2698.63 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:19:41,682 epoch 68 - iter 1284/2142 - loss 0.00141685 - time (sec): 111.67 - samples/sec: 2703.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:20:00,360 epoch 68 - iter 1498/2142 - loss 0.00139622 - time (sec): 130.35 - samples/sec: 2703.50 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:20:19,001 epoch 68 - iter 1712/2142 - loss 0.00165966 - time (sec): 148.99 - samples/sec: 2708.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:20:37,665 epoch 68 - iter 1926/2142 - loss 0.00155137 - time (sec): 167.66 - samples/sec: 2711.54 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:20:56,113 epoch 68 - iter 2140/2142 - loss 0.00153378 - time (sec): 186.10 - samples/sec: 2716.37 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:20:56,280 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:20:56,280 EPOCH 68 done: loss 0.0015 - lr: 0.000003 +2024-10-02 02:21:06,429 DEV : loss 0.30025333166122437 - f1-score (micro avg) 0.9159 +2024-10-02 02:21:06,461 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:21:25,477 epoch 69 - iter 214/2142 - loss 0.00180021 - time (sec): 19.01 - samples/sec: 2728.14 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:21:43,773 epoch 69 - iter 428/2142 - loss 0.00175775 - time (sec): 37.31 - samples/sec: 2728.27 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:22:01,882 epoch 69 - iter 642/2142 - loss 0.00167115 - time (sec): 55.42 - samples/sec: 2736.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:22:20,503 epoch 69 - iter 856/2142 - loss 0.00145822 - time (sec): 74.04 - samples/sec: 2725.05 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:22:39,715 epoch 69 - iter 1070/2142 - loss 0.00148383 - time (sec): 93.25 - samples/sec: 2723.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:22:58,094 epoch 69 - iter 1284/2142 - loss 0.00162397 - time (sec): 111.63 - samples/sec: 2724.00 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:23:16,804 epoch 69 - iter 1498/2142 - loss 0.00149025 - time (sec): 130.34 - samples/sec: 2715.66 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:23:35,503 epoch 69 - iter 1712/2142 - loss 0.00148027 - time (sec): 149.04 - samples/sec: 2718.50 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:23:53,890 epoch 69 - iter 1926/2142 - loss 0.00143818 - time (sec): 167.43 - samples/sec: 2717.87 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:24:12,254 epoch 69 - iter 2140/2142 - loss 0.00142491 - time (sec): 185.79 - samples/sec: 2720.53 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:24:12,421 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:24:12,422 EPOCH 69 done: loss 0.0014 - lr: 0.000003 +2024-10-02 02:24:21,612 DEV : loss 0.2949461042881012 - f1-score (micro avg) 0.9174 +2024-10-02 02:24:21,640 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:24:39,969 epoch 70 - iter 214/2142 - loss 0.00098300 - time (sec): 18.33 - samples/sec: 2738.67 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:24:58,843 epoch 70 - iter 428/2142 - loss 0.00154203 - time (sec): 37.20 - samples/sec: 2702.23 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:25:17,226 epoch 70 - iter 642/2142 - loss 0.00201999 - time (sec): 55.58 - samples/sec: 2721.29 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:25:36,148 epoch 70 - iter 856/2142 - loss 0.00204759 - time (sec): 74.51 - samples/sec: 2720.88 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:25:54,555 epoch 70 - iter 1070/2142 - loss 0.00210475 - time (sec): 92.91 - samples/sec: 2718.20 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:26:12,743 epoch 70 - iter 1284/2142 - loss 0.00185744 - time (sec): 111.10 - samples/sec: 2728.45 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:26:31,226 epoch 70 - iter 1498/2142 - loss 0.00184326 - time (sec): 129.58 - samples/sec: 2723.62 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:26:50,136 epoch 70 - iter 1712/2142 - loss 0.00178269 - time (sec): 148.49 - samples/sec: 2721.00 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:27:08,966 epoch 70 - iter 1926/2142 - loss 0.00170039 - time (sec): 167.32 - samples/sec: 2718.53 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:27:27,242 epoch 70 - iter 2140/2142 - loss 0.00171937 - time (sec): 185.60 - samples/sec: 2723.47 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:27:27,387 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:27:27,388 EPOCH 70 done: loss 0.0017 - lr: 0.000003 +2024-10-02 02:27:37,126 DEV : loss 0.29188787937164307 - f1-score (micro avg) 0.9179 +2024-10-02 02:27:37,157 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:27:55,813 epoch 71 - iter 214/2142 - loss 0.00090835 - time (sec): 18.66 - samples/sec: 2688.17 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:28:14,366 epoch 71 - iter 428/2142 - loss 0.00103202 - time (sec): 37.21 - samples/sec: 2697.63 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:28:32,616 epoch 71 - iter 642/2142 - loss 0.00109179 - time (sec): 55.46 - samples/sec: 2702.72 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:28:51,214 epoch 71 - iter 856/2142 - loss 0.00109258 - time (sec): 74.06 - samples/sec: 2720.07 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:29:09,972 epoch 71 - iter 1070/2142 - loss 0.00108154 - time (sec): 92.81 - samples/sec: 2721.01 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:29:28,540 epoch 71 - iter 1284/2142 - loss 0.00106774 - time (sec): 111.38 - samples/sec: 2716.62 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:29:47,411 epoch 71 - iter 1498/2142 - loss 0.00109326 - time (sec): 130.25 - samples/sec: 2716.66 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:30:05,938 epoch 71 - iter 1712/2142 - loss 0.00120602 - time (sec): 148.78 - samples/sec: 2714.64 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:30:24,146 epoch 71 - iter 1926/2142 - loss 0.00137547 - time (sec): 166.99 - samples/sec: 2723.52 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:30:43,188 epoch 71 - iter 2140/2142 - loss 0.00133149 - time (sec): 186.03 - samples/sec: 2717.39 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:30:43,339 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:30:43,339 EPOCH 71 done: loss 0.0013 - lr: 0.000003 +2024-10-02 02:30:52,252 DEV : loss 0.29462534189224243 - f1-score (micro avg) 0.917 +2024-10-02 02:30:52,280 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:31:10,793 epoch 72 - iter 214/2142 - loss 0.00082099 - time (sec): 18.51 - samples/sec: 2756.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:31:29,121 epoch 72 - iter 428/2142 - loss 0.00147379 - time (sec): 36.84 - samples/sec: 2736.57 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:31:47,595 epoch 72 - iter 642/2142 - loss 0.00167531 - time (sec): 55.31 - samples/sec: 2733.86 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:32:06,021 epoch 72 - iter 856/2142 - loss 0.00160577 - time (sec): 73.74 - samples/sec: 2731.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:32:24,901 epoch 72 - iter 1070/2142 - loss 0.00163990 - time (sec): 92.62 - samples/sec: 2730.03 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:32:43,482 epoch 72 - iter 1284/2142 - loss 0.00180695 - time (sec): 111.20 - samples/sec: 2730.32 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:33:01,819 epoch 72 - iter 1498/2142 - loss 0.00185837 - time (sec): 129.54 - samples/sec: 2730.41 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:33:20,666 epoch 72 - iter 1712/2142 - loss 0.00187267 - time (sec): 148.39 - samples/sec: 2725.52 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:33:39,506 epoch 72 - iter 1926/2142 - loss 0.00172925 - time (sec): 167.23 - samples/sec: 2721.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:33:58,073 epoch 72 - iter 2140/2142 - loss 0.00185991 - time (sec): 185.79 - samples/sec: 2721.33 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:33:58,206 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:33:58,207 EPOCH 72 done: loss 0.0019 - lr: 0.000003 +2024-10-02 02:34:07,937 DEV : loss 0.2982197701931 - f1-score (micro avg) 0.9167 +2024-10-02 02:34:07,968 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:34:26,687 epoch 73 - iter 214/2142 - loss 0.00065945 - time (sec): 18.72 - samples/sec: 2762.98 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:34:45,297 epoch 73 - iter 428/2142 - loss 0.00049743 - time (sec): 37.33 - samples/sec: 2737.05 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:35:03,564 epoch 73 - iter 642/2142 - loss 0.00091628 - time (sec): 55.59 - samples/sec: 2737.67 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:35:22,016 epoch 73 - iter 856/2142 - loss 0.00079794 - time (sec): 74.05 - samples/sec: 2732.15 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:35:40,744 epoch 73 - iter 1070/2142 - loss 0.00086699 - time (sec): 92.77 - samples/sec: 2723.48 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:35:59,221 epoch 73 - iter 1284/2142 - loss 0.00100856 - time (sec): 111.25 - samples/sec: 2717.79 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:36:17,818 epoch 73 - iter 1498/2142 - loss 0.00111634 - time (sec): 129.85 - samples/sec: 2724.32 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:36:36,788 epoch 73 - iter 1712/2142 - loss 0.00123434 - time (sec): 148.82 - samples/sec: 2722.31 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:36:55,137 epoch 73 - iter 1926/2142 - loss 0.00116049 - time (sec): 167.17 - samples/sec: 2724.75 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:37:13,709 epoch 73 - iter 2140/2142 - loss 0.00121241 - time (sec): 185.74 - samples/sec: 2721.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:37:13,854 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:37:13,854 EPOCH 73 done: loss 0.0012 - lr: 0.000003 +2024-10-02 02:37:23,736 DEV : loss 0.29604572057724 - f1-score (micro avg) 0.9184 +2024-10-02 02:37:23,769 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:37:42,590 epoch 74 - iter 214/2142 - loss 0.00079744 - time (sec): 18.82 - samples/sec: 2689.06 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:38:01,697 epoch 74 - iter 428/2142 - loss 0.00112438 - time (sec): 37.93 - samples/sec: 2660.78 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:38:20,243 epoch 74 - iter 642/2142 - loss 0.00107074 - time (sec): 56.47 - samples/sec: 2690.69 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:38:38,667 epoch 74 - iter 856/2142 - loss 0.00114223 - time (sec): 74.90 - samples/sec: 2689.75 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:38:57,336 epoch 74 - iter 1070/2142 - loss 0.00125208 - time (sec): 93.57 - samples/sec: 2688.05 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:39:15,993 epoch 74 - iter 1284/2142 - loss 0.00151226 - time (sec): 112.22 - samples/sec: 2695.62 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:39:35,141 epoch 74 - iter 1498/2142 - loss 0.00142163 - time (sec): 131.37 - samples/sec: 2692.88 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:39:54,049 epoch 74 - iter 1712/2142 - loss 0.00151776 - time (sec): 150.28 - samples/sec: 2690.23 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:40:12,432 epoch 74 - iter 1926/2142 - loss 0.00139229 - time (sec): 168.66 - samples/sec: 2698.98 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:40:31,211 epoch 74 - iter 2140/2142 - loss 0.00143928 - time (sec): 187.44 - samples/sec: 2697.04 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:40:31,375 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:40:31,376 EPOCH 74 done: loss 0.0014 - lr: 0.000003 +2024-10-02 02:40:40,439 DEV : loss 0.29686811566352844 - f1-score (micro avg) 0.9177 +2024-10-02 02:40:40,469 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:40:58,956 epoch 75 - iter 214/2142 - loss 0.00159680 - time (sec): 18.49 - samples/sec: 2716.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:41:17,739 epoch 75 - iter 428/2142 - loss 0.00144256 - time (sec): 37.27 - samples/sec: 2718.41 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:41:36,704 epoch 75 - iter 642/2142 - loss 0.00131936 - time (sec): 56.23 - samples/sec: 2696.58 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:41:55,491 epoch 75 - iter 856/2142 - loss 0.00155415 - time (sec): 75.02 - samples/sec: 2700.27 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:42:14,114 epoch 75 - iter 1070/2142 - loss 0.00170290 - time (sec): 93.64 - samples/sec: 2698.84 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:42:32,347 epoch 75 - iter 1284/2142 - loss 0.00151437 - time (sec): 111.88 - samples/sec: 2706.64 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:42:51,013 epoch 75 - iter 1498/2142 - loss 0.00161868 - time (sec): 130.54 - samples/sec: 2702.57 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:43:09,174 epoch 75 - iter 1712/2142 - loss 0.00152428 - time (sec): 148.70 - samples/sec: 2710.78 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:43:28,005 epoch 75 - iter 1926/2142 - loss 0.00151896 - time (sec): 167.54 - samples/sec: 2710.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:43:46,878 epoch 75 - iter 2140/2142 - loss 0.00139314 - time (sec): 186.41 - samples/sec: 2711.81 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:43:47,035 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:43:47,036 EPOCH 75 done: loss 0.0014 - lr: 0.000003 +2024-10-02 02:43:56,835 DEV : loss 0.3093326687812805 - f1-score (micro avg) 0.9173 +2024-10-02 02:43:56,869 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:44:15,342 epoch 76 - iter 214/2142 - loss 0.00051305 - time (sec): 18.47 - samples/sec: 2700.86 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:44:34,243 epoch 76 - iter 428/2142 - loss 0.00106166 - time (sec): 37.37 - samples/sec: 2734.28 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:44:52,521 epoch 76 - iter 642/2142 - loss 0.00119239 - time (sec): 55.65 - samples/sec: 2735.30 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:45:11,285 epoch 76 - iter 856/2142 - loss 0.00108876 - time (sec): 74.41 - samples/sec: 2726.14 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:45:29,960 epoch 76 - iter 1070/2142 - loss 0.00101294 - time (sec): 93.09 - samples/sec: 2715.30 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:45:48,887 epoch 76 - iter 1284/2142 - loss 0.00109284 - time (sec): 112.02 - samples/sec: 2710.93 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:46:07,914 epoch 76 - iter 1498/2142 - loss 0.00113621 - time (sec): 131.04 - samples/sec: 2701.20 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:46:26,201 epoch 76 - iter 1712/2142 - loss 0.00115068 - time (sec): 149.33 - samples/sec: 2709.38 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:46:44,680 epoch 76 - iter 1926/2142 - loss 0.00124958 - time (sec): 167.81 - samples/sec: 2706.19 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:47:03,401 epoch 76 - iter 2140/2142 - loss 0.00129041 - time (sec): 186.53 - samples/sec: 2709.49 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:47:03,570 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:47:03,571 EPOCH 76 done: loss 0.0013 - lr: 0.000003 +2024-10-02 02:47:13,342 DEV : loss 0.29795926809310913 - f1-score (micro avg) 0.9181 +2024-10-02 02:47:13,370 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:47:31,826 epoch 77 - iter 214/2142 - loss 0.00100779 - time (sec): 18.45 - samples/sec: 2745.53 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:47:50,546 epoch 77 - iter 428/2142 - loss 0.00103994 - time (sec): 37.17 - samples/sec: 2712.85 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:48:09,483 epoch 77 - iter 642/2142 - loss 0.00102042 - time (sec): 56.11 - samples/sec: 2708.01 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:48:28,561 epoch 77 - iter 856/2142 - loss 0.00086281 - time (sec): 75.19 - samples/sec: 2697.82 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:48:47,210 epoch 77 - iter 1070/2142 - loss 0.00081062 - time (sec): 93.84 - samples/sec: 2711.02 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:49:05,997 epoch 77 - iter 1284/2142 - loss 0.00109294 - time (sec): 112.63 - samples/sec: 2696.48 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:49:24,601 epoch 77 - iter 1498/2142 - loss 0.00110030 - time (sec): 131.23 - samples/sec: 2705.17 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:49:43,004 epoch 77 - iter 1712/2142 - loss 0.00105966 - time (sec): 149.63 - samples/sec: 2708.40 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:50:01,582 epoch 77 - iter 1926/2142 - loss 0.00112424 - time (sec): 168.21 - samples/sec: 2705.86 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:50:20,146 epoch 77 - iter 2140/2142 - loss 0.00113932 - time (sec): 186.77 - samples/sec: 2706.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:50:20,281 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:50:20,281 EPOCH 77 done: loss 0.0011 - lr: 0.000003 +2024-10-02 02:50:29,620 DEV : loss 0.3033064305782318 - f1-score (micro avg) 0.9175 +2024-10-02 02:50:29,647 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:50:48,353 epoch 78 - iter 214/2142 - loss 0.00096424 - time (sec): 18.70 - samples/sec: 2719.10 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:51:07,039 epoch 78 - iter 428/2142 - loss 0.00126551 - time (sec): 37.39 - samples/sec: 2702.77 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:51:25,603 epoch 78 - iter 642/2142 - loss 0.00129328 - time (sec): 55.95 - samples/sec: 2715.10 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:51:44,217 epoch 78 - iter 856/2142 - loss 0.00127191 - time (sec): 74.57 - samples/sec: 2719.04 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:52:02,686 epoch 78 - iter 1070/2142 - loss 0.00130601 - time (sec): 93.04 - samples/sec: 2725.40 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:52:21,975 epoch 78 - iter 1284/2142 - loss 0.00166946 - time (sec): 112.33 - samples/sec: 2716.24 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:52:40,227 epoch 78 - iter 1498/2142 - loss 0.00163360 - time (sec): 130.58 - samples/sec: 2717.15 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:52:58,508 epoch 78 - iter 1712/2142 - loss 0.00158127 - time (sec): 148.86 - samples/sec: 2712.61 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:53:17,537 epoch 78 - iter 1926/2142 - loss 0.00160172 - time (sec): 167.89 - samples/sec: 2710.50 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:53:35,849 epoch 78 - iter 2140/2142 - loss 0.00154085 - time (sec): 186.20 - samples/sec: 2714.85 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:53:35,995 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:53:35,996 EPOCH 78 done: loss 0.0015 - lr: 0.000003 +2024-10-02 02:53:46,028 DEV : loss 0.29921475052833557 - f1-score (micro avg) 0.9177 +2024-10-02 02:53:46,060 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:54:05,221 epoch 79 - iter 214/2142 - loss 0.00088812 - time (sec): 19.16 - samples/sec: 2701.92 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:54:23,979 epoch 79 - iter 428/2142 - loss 0.00170806 - time (sec): 37.92 - samples/sec: 2709.03 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:54:42,784 epoch 79 - iter 642/2142 - loss 0.00151885 - time (sec): 56.72 - samples/sec: 2700.91 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:55:01,531 epoch 79 - iter 856/2142 - loss 0.00134622 - time (sec): 75.47 - samples/sec: 2699.04 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:55:19,686 epoch 79 - iter 1070/2142 - loss 0.00129678 - time (sec): 93.63 - samples/sec: 2702.48 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:55:38,214 epoch 79 - iter 1284/2142 - loss 0.00132715 - time (sec): 112.15 - samples/sec: 2707.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:55:57,186 epoch 79 - iter 1498/2142 - loss 0.00130640 - time (sec): 131.12 - samples/sec: 2706.14 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:56:16,012 epoch 79 - iter 1712/2142 - loss 0.00149815 - time (sec): 149.95 - samples/sec: 2702.07 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:56:34,660 epoch 79 - iter 1926/2142 - loss 0.00141080 - time (sec): 168.60 - samples/sec: 2705.25 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:56:52,896 epoch 79 - iter 2140/2142 - loss 0.00130673 - time (sec): 186.84 - samples/sec: 2705.38 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:56:53,043 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:56:53,044 EPOCH 79 done: loss 0.0013 - lr: 0.000003 +2024-10-02 02:57:03,141 DEV : loss 0.3049955666065216 - f1-score (micro avg) 0.9158 +2024-10-02 02:57:03,169 ---------------------------------------------------------------------------------------------------- +2024-10-02 02:57:21,472 epoch 80 - iter 214/2142 - loss 0.00152040 - time (sec): 18.30 - samples/sec: 2758.59 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:57:40,341 epoch 80 - iter 428/2142 - loss 0.00147782 - time (sec): 37.17 - samples/sec: 2709.36 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:57:58,792 epoch 80 - iter 642/2142 - loss 0.00145496 - time (sec): 55.62 - samples/sec: 2711.95 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:58:17,497 epoch 80 - iter 856/2142 - loss 0.00140371 - time (sec): 74.33 - samples/sec: 2727.92 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:58:36,949 epoch 80 - iter 1070/2142 - loss 0.00154277 - time (sec): 93.78 - samples/sec: 2711.03 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:58:55,407 epoch 80 - iter 1284/2142 - loss 0.00150641 - time (sec): 112.24 - samples/sec: 2716.94 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:59:13,798 epoch 80 - iter 1498/2142 - loss 0.00133800 - time (sec): 130.63 - samples/sec: 2721.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:59:32,675 epoch 80 - iter 1712/2142 - loss 0.00136545 - time (sec): 149.51 - samples/sec: 2717.34 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 02:59:51,090 epoch 80 - iter 1926/2142 - loss 0.00132324 - time (sec): 167.92 - samples/sec: 2711.79 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:00:09,996 epoch 80 - iter 2140/2142 - loss 0.00127200 - time (sec): 186.83 - samples/sec: 2705.92 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:00:10,144 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:00:10,144 EPOCH 80 done: loss 0.0013 - lr: 0.000003 +2024-10-02 03:00:19,170 DEV : loss 0.3060019612312317 - f1-score (micro avg) 0.9169 +2024-10-02 03:00:19,201 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:00:38,305 epoch 81 - iter 214/2142 - loss 0.00088538 - time (sec): 19.10 - samples/sec: 2705.45 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:00:56,677 epoch 81 - iter 428/2142 - loss 0.00129640 - time (sec): 37.48 - samples/sec: 2717.21 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:01:15,113 epoch 81 - iter 642/2142 - loss 0.00124201 - time (sec): 55.91 - samples/sec: 2718.09 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:01:33,407 epoch 81 - iter 856/2142 - loss 0.00149827 - time (sec): 74.20 - samples/sec: 2721.78 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:01:52,080 epoch 81 - iter 1070/2142 - loss 0.00155096 - time (sec): 92.88 - samples/sec: 2722.16 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:02:11,005 epoch 81 - iter 1284/2142 - loss 0.00150596 - time (sec): 111.80 - samples/sec: 2719.68 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:02:29,539 epoch 81 - iter 1498/2142 - loss 0.00150223 - time (sec): 130.34 - samples/sec: 2716.56 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:02:47,773 epoch 81 - iter 1712/2142 - loss 0.00148319 - time (sec): 148.57 - samples/sec: 2718.66 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:03:06,775 epoch 81 - iter 1926/2142 - loss 0.00142970 - time (sec): 167.57 - samples/sec: 2717.80 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:03:25,337 epoch 81 - iter 2140/2142 - loss 0.00137073 - time (sec): 186.14 - samples/sec: 2716.39 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:03:25,479 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:03:25,479 EPOCH 81 done: loss 0.0014 - lr: 0.000003 +2024-10-02 03:03:35,561 DEV : loss 0.30465465784072876 - f1-score (micro avg) 0.9177 +2024-10-02 03:03:35,588 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:03:54,057 epoch 82 - iter 214/2142 - loss 0.00088211 - time (sec): 18.47 - samples/sec: 2713.40 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:04:13,021 epoch 82 - iter 428/2142 - loss 0.00067239 - time (sec): 37.43 - samples/sec: 2696.24 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:04:32,246 epoch 82 - iter 642/2142 - loss 0.00114635 - time (sec): 56.66 - samples/sec: 2694.53 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:04:50,853 epoch 82 - iter 856/2142 - loss 0.00158997 - time (sec): 75.26 - samples/sec: 2711.58 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:05:09,565 epoch 82 - iter 1070/2142 - loss 0.00152881 - time (sec): 93.98 - samples/sec: 2698.54 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:05:27,655 epoch 82 - iter 1284/2142 - loss 0.00150146 - time (sec): 112.07 - samples/sec: 2703.90 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:05:46,555 epoch 82 - iter 1498/2142 - loss 0.00138361 - time (sec): 130.97 - samples/sec: 2706.90 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:06:05,041 epoch 82 - iter 1712/2142 - loss 0.00129435 - time (sec): 149.45 - samples/sec: 2711.18 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:06:23,935 epoch 82 - iter 1926/2142 - loss 0.00124188 - time (sec): 168.35 - samples/sec: 2706.85 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:06:42,280 epoch 82 - iter 2140/2142 - loss 0.00123936 - time (sec): 186.69 - samples/sec: 2708.06 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:06:42,434 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:06:42,435 EPOCH 82 done: loss 0.0012 - lr: 0.000003 +2024-10-02 03:06:52,522 DEV : loss 0.31024670600891113 - f1-score (micro avg) 0.9165 +2024-10-02 03:06:52,550 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:07:11,080 epoch 83 - iter 214/2142 - loss 0.00123781 - time (sec): 18.53 - samples/sec: 2724.89 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:07:29,594 epoch 83 - iter 428/2142 - loss 0.00120200 - time (sec): 37.04 - samples/sec: 2709.00 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:07:48,458 epoch 83 - iter 642/2142 - loss 0.00097597 - time (sec): 55.91 - samples/sec: 2715.22 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:08:07,287 epoch 83 - iter 856/2142 - loss 0.00077944 - time (sec): 74.74 - samples/sec: 2707.76 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:08:26,507 epoch 83 - iter 1070/2142 - loss 0.00064655 - time (sec): 93.96 - samples/sec: 2711.11 - lr: 0.000003 - momentum: 0.000000 +2024-10-02 03:08:44,996 epoch 83 - iter 1284/2142 - loss 0.00059573 - time (sec): 112.44 - samples/sec: 2703.90 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:09:03,567 epoch 83 - iter 1498/2142 - loss 0.00057446 - time (sec): 131.02 - samples/sec: 2703.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:09:22,095 epoch 83 - iter 1712/2142 - loss 0.00069235 - time (sec): 149.54 - samples/sec: 2707.93 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:09:40,682 epoch 83 - iter 1926/2142 - loss 0.00073000 - time (sec): 168.13 - samples/sec: 2707.77 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:09:59,070 epoch 83 - iter 2140/2142 - loss 0.00076040 - time (sec): 186.52 - samples/sec: 2709.51 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:09:59,250 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:09:59,250 EPOCH 83 done: loss 0.0008 - lr: 0.000002 +2024-10-02 03:10:08,411 DEV : loss 0.30651748180389404 - f1-score (micro avg) 0.9179 +2024-10-02 03:10:08,439 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:10:26,935 epoch 84 - iter 214/2142 - loss 0.00042099 - time (sec): 18.49 - samples/sec: 2734.27 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:10:46,096 epoch 84 - iter 428/2142 - loss 0.00076270 - time (sec): 37.66 - samples/sec: 2698.25 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:11:04,482 epoch 84 - iter 642/2142 - loss 0.00095528 - time (sec): 56.04 - samples/sec: 2698.82 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:11:23,333 epoch 84 - iter 856/2142 - loss 0.00095609 - time (sec): 74.89 - samples/sec: 2714.38 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:11:42,190 epoch 84 - iter 1070/2142 - loss 0.00086670 - time (sec): 93.75 - samples/sec: 2710.58 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:12:00,655 epoch 84 - iter 1284/2142 - loss 0.00081460 - time (sec): 112.21 - samples/sec: 2709.54 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:12:19,254 epoch 84 - iter 1498/2142 - loss 0.00088933 - time (sec): 130.81 - samples/sec: 2713.49 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:12:37,925 epoch 84 - iter 1712/2142 - loss 0.00098483 - time (sec): 149.48 - samples/sec: 2713.22 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:12:56,500 epoch 84 - iter 1926/2142 - loss 0.00098772 - time (sec): 168.06 - samples/sec: 2709.09 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:13:15,068 epoch 84 - iter 2140/2142 - loss 0.00099073 - time (sec): 186.63 - samples/sec: 2708.48 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:13:15,216 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:13:15,217 EPOCH 84 done: loss 0.0010 - lr: 0.000002 +2024-10-02 03:13:25,213 DEV : loss 0.3030647933483124 - f1-score (micro avg) 0.9189 +2024-10-02 03:13:25,240 saving best model +2024-10-02 03:13:30,151 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:13:48,870 epoch 85 - iter 214/2142 - loss 0.00163271 - time (sec): 18.72 - samples/sec: 2701.80 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:14:07,884 epoch 85 - iter 428/2142 - loss 0.00121434 - time (sec): 37.73 - samples/sec: 2704.00 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:14:26,044 epoch 85 - iter 642/2142 - loss 0.00098848 - time (sec): 55.89 - samples/sec: 2699.88 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:14:44,666 epoch 85 - iter 856/2142 - loss 0.00129812 - time (sec): 74.51 - samples/sec: 2711.32 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:15:03,750 epoch 85 - iter 1070/2142 - loss 0.00113465 - time (sec): 93.60 - samples/sec: 2699.52 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:15:22,211 epoch 85 - iter 1284/2142 - loss 0.00119332 - time (sec): 112.06 - samples/sec: 2704.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:15:40,845 epoch 85 - iter 1498/2142 - loss 0.00121986 - time (sec): 130.69 - samples/sec: 2703.06 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:15:59,188 epoch 85 - iter 1712/2142 - loss 0.00115403 - time (sec): 149.04 - samples/sec: 2709.94 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:16:17,574 epoch 85 - iter 1926/2142 - loss 0.00112412 - time (sec): 167.42 - samples/sec: 2712.24 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:16:36,366 epoch 85 - iter 2140/2142 - loss 0.00109263 - time (sec): 186.21 - samples/sec: 2714.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:16:36,521 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:16:36,522 EPOCH 85 done: loss 0.0011 - lr: 0.000002 +2024-10-02 03:16:46,510 DEV : loss 0.3119923174381256 - f1-score (micro avg) 0.918 +2024-10-02 03:16:46,539 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:17:04,834 epoch 86 - iter 214/2142 - loss 0.00106720 - time (sec): 18.29 - samples/sec: 2743.69 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:17:23,507 epoch 86 - iter 428/2142 - loss 0.00096195 - time (sec): 36.97 - samples/sec: 2702.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:17:41,527 epoch 86 - iter 642/2142 - loss 0.00079331 - time (sec): 54.99 - samples/sec: 2725.10 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:18:00,102 epoch 86 - iter 856/2142 - loss 0.00073315 - time (sec): 73.56 - samples/sec: 2722.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:18:19,214 epoch 86 - iter 1070/2142 - loss 0.00080125 - time (sec): 92.67 - samples/sec: 2703.27 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:18:38,070 epoch 86 - iter 1284/2142 - loss 0.00103373 - time (sec): 111.53 - samples/sec: 2704.72 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:18:56,812 epoch 86 - iter 1498/2142 - loss 0.00103750 - time (sec): 130.27 - samples/sec: 2708.10 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:19:15,407 epoch 86 - iter 1712/2142 - loss 0.00105356 - time (sec): 148.87 - samples/sec: 2716.52 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:19:34,065 epoch 86 - iter 1926/2142 - loss 0.00106684 - time (sec): 167.52 - samples/sec: 2713.99 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:19:52,766 epoch 86 - iter 2140/2142 - loss 0.00105373 - time (sec): 186.23 - samples/sec: 2714.45 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:19:52,925 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:19:52,925 EPOCH 86 done: loss 0.0011 - lr: 0.000002 +2024-10-02 03:20:02,192 DEV : loss 0.31240612268447876 - f1-score (micro avg) 0.9156 +2024-10-02 03:20:02,219 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:20:20,607 epoch 87 - iter 214/2142 - loss 0.00031199 - time (sec): 18.39 - samples/sec: 2704.82 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:20:39,422 epoch 87 - iter 428/2142 - loss 0.00084356 - time (sec): 37.20 - samples/sec: 2714.17 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:20:57,936 epoch 87 - iter 642/2142 - loss 0.00079837 - time (sec): 55.72 - samples/sec: 2705.98 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:21:16,577 epoch 87 - iter 856/2142 - loss 0.00095047 - time (sec): 74.36 - samples/sec: 2719.61 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:21:34,929 epoch 87 - iter 1070/2142 - loss 0.00092023 - time (sec): 92.71 - samples/sec: 2726.02 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:21:53,540 epoch 87 - iter 1284/2142 - loss 0.00116761 - time (sec): 111.32 - samples/sec: 2728.60 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:22:12,023 epoch 87 - iter 1498/2142 - loss 0.00106075 - time (sec): 129.80 - samples/sec: 2724.88 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:22:30,686 epoch 87 - iter 1712/2142 - loss 0.00110242 - time (sec): 148.47 - samples/sec: 2729.30 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:22:49,149 epoch 87 - iter 1926/2142 - loss 0.00118617 - time (sec): 166.93 - samples/sec: 2726.36 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:23:08,227 epoch 87 - iter 2140/2142 - loss 0.00116176 - time (sec): 186.01 - samples/sec: 2717.25 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:23:08,400 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:23:08,401 EPOCH 87 done: loss 0.0012 - lr: 0.000002 +2024-10-02 03:23:18,187 DEV : loss 0.29618072509765625 - f1-score (micro avg) 0.918 +2024-10-02 03:23:18,218 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:23:36,902 epoch 88 - iter 214/2142 - loss 0.00105804 - time (sec): 18.68 - samples/sec: 2685.34 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:23:55,586 epoch 88 - iter 428/2142 - loss 0.00102599 - time (sec): 37.37 - samples/sec: 2712.97 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:24:14,309 epoch 88 - iter 642/2142 - loss 0.00086713 - time (sec): 56.09 - samples/sec: 2723.90 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:24:32,608 epoch 88 - iter 856/2142 - loss 0.00084466 - time (sec): 74.39 - samples/sec: 2701.31 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:24:50,723 epoch 88 - iter 1070/2142 - loss 0.00106415 - time (sec): 92.50 - samples/sec: 2705.10 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:25:09,740 epoch 88 - iter 1284/2142 - loss 0.00117088 - time (sec): 111.52 - samples/sec: 2716.00 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:25:27,871 epoch 88 - iter 1498/2142 - loss 0.00107082 - time (sec): 129.65 - samples/sec: 2721.25 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:25:46,984 epoch 88 - iter 1712/2142 - loss 0.00096582 - time (sec): 148.76 - samples/sec: 2716.37 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:26:05,616 epoch 88 - iter 1926/2142 - loss 0.00095845 - time (sec): 167.40 - samples/sec: 2717.70 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:26:24,470 epoch 88 - iter 2140/2142 - loss 0.00094622 - time (sec): 186.25 - samples/sec: 2714.53 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:26:24,633 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:26:24,634 EPOCH 88 done: loss 0.0009 - lr: 0.000002 +2024-10-02 03:26:34,511 DEV : loss 0.30072423815727234 - f1-score (micro avg) 0.9164 +2024-10-02 03:26:34,542 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:26:53,294 epoch 89 - iter 214/2142 - loss 0.00072380 - time (sec): 18.75 - samples/sec: 2736.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:27:12,242 epoch 89 - iter 428/2142 - loss 0.00111694 - time (sec): 37.70 - samples/sec: 2702.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:27:30,773 epoch 89 - iter 642/2142 - loss 0.00091791 - time (sec): 56.23 - samples/sec: 2708.48 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:27:49,666 epoch 89 - iter 856/2142 - loss 0.00087533 - time (sec): 75.12 - samples/sec: 2701.82 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:28:08,594 epoch 89 - iter 1070/2142 - loss 0.00119012 - time (sec): 94.05 - samples/sec: 2700.30 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:28:26,783 epoch 89 - iter 1284/2142 - loss 0.00104331 - time (sec): 112.24 - samples/sec: 2703.78 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:28:45,507 epoch 89 - iter 1498/2142 - loss 0.00104293 - time (sec): 130.96 - samples/sec: 2707.90 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:29:03,941 epoch 89 - iter 1712/2142 - loss 0.00105747 - time (sec): 149.40 - samples/sec: 2708.28 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:29:22,836 epoch 89 - iter 1926/2142 - loss 0.00097863 - time (sec): 168.29 - samples/sec: 2706.47 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:29:41,373 epoch 89 - iter 2140/2142 - loss 0.00095691 - time (sec): 186.83 - samples/sec: 2705.93 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:29:41,509 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:29:41,510 EPOCH 89 done: loss 0.0010 - lr: 0.000002 +2024-10-02 03:29:50,457 DEV : loss 0.29945579171180725 - f1-score (micro avg) 0.9168 +2024-10-02 03:29:50,485 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:30:09,191 epoch 90 - iter 214/2142 - loss 0.00074340 - time (sec): 18.70 - samples/sec: 2702.37 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:30:27,614 epoch 90 - iter 428/2142 - loss 0.00042710 - time (sec): 37.13 - samples/sec: 2704.32 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:30:46,350 epoch 90 - iter 642/2142 - loss 0.00089256 - time (sec): 55.86 - samples/sec: 2697.71 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:31:04,949 epoch 90 - iter 856/2142 - loss 0.00098680 - time (sec): 74.46 - samples/sec: 2706.81 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:31:23,790 epoch 90 - iter 1070/2142 - loss 0.00109672 - time (sec): 93.30 - samples/sec: 2713.96 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:31:42,644 epoch 90 - iter 1284/2142 - loss 0.00109355 - time (sec): 112.16 - samples/sec: 2705.84 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:32:01,105 epoch 90 - iter 1498/2142 - loss 0.00107575 - time (sec): 130.62 - samples/sec: 2713.94 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:32:19,454 epoch 90 - iter 1712/2142 - loss 0.00100424 - time (sec): 148.97 - samples/sec: 2714.21 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:32:38,549 epoch 90 - iter 1926/2142 - loss 0.00105526 - time (sec): 168.06 - samples/sec: 2708.35 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:32:57,244 epoch 90 - iter 2140/2142 - loss 0.00107923 - time (sec): 186.76 - samples/sec: 2706.13 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:32:57,407 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:32:57,407 EPOCH 90 done: loss 0.0011 - lr: 0.000002 +2024-10-02 03:33:07,508 DEV : loss 0.3083708584308624 - f1-score (micro avg) 0.9166 +2024-10-02 03:33:07,537 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:33:26,104 epoch 91 - iter 214/2142 - loss 0.00114947 - time (sec): 18.57 - samples/sec: 2719.93 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:33:44,667 epoch 91 - iter 428/2142 - loss 0.00119457 - time (sec): 37.13 - samples/sec: 2753.52 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:34:03,278 epoch 91 - iter 642/2142 - loss 0.00104040 - time (sec): 55.74 - samples/sec: 2735.80 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:34:22,191 epoch 91 - iter 856/2142 - loss 0.00115993 - time (sec): 74.65 - samples/sec: 2720.24 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:34:41,107 epoch 91 - iter 1070/2142 - loss 0.00119301 - time (sec): 93.57 - samples/sec: 2706.97 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:34:59,803 epoch 91 - iter 1284/2142 - loss 0.00115337 - time (sec): 112.26 - samples/sec: 2704.65 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:35:18,439 epoch 91 - iter 1498/2142 - loss 0.00118982 - time (sec): 130.90 - samples/sec: 2706.38 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:35:36,483 epoch 91 - iter 1712/2142 - loss 0.00121758 - time (sec): 148.94 - samples/sec: 2714.54 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:35:54,906 epoch 91 - iter 1926/2142 - loss 0.00123843 - time (sec): 167.37 - samples/sec: 2717.27 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:36:13,366 epoch 91 - iter 2140/2142 - loss 0.00117971 - time (sec): 185.83 - samples/sec: 2719.94 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:36:13,558 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:36:13,559 EPOCH 91 done: loss 0.0012 - lr: 0.000002 +2024-10-02 03:36:22,488 DEV : loss 0.29786217212677 - f1-score (micro avg) 0.9177 +2024-10-02 03:36:22,516 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:36:41,376 epoch 92 - iter 214/2142 - loss 0.00013579 - time (sec): 18.86 - samples/sec: 2713.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:36:59,993 epoch 92 - iter 428/2142 - loss 0.00051055 - time (sec): 37.48 - samples/sec: 2696.50 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:37:18,782 epoch 92 - iter 642/2142 - loss 0.00041540 - time (sec): 56.26 - samples/sec: 2708.35 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:37:37,029 epoch 92 - iter 856/2142 - loss 0.00042746 - time (sec): 74.51 - samples/sec: 2713.20 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:37:55,474 epoch 92 - iter 1070/2142 - loss 0.00059046 - time (sec): 92.96 - samples/sec: 2718.72 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:38:13,907 epoch 92 - iter 1284/2142 - loss 0.00072065 - time (sec): 111.39 - samples/sec: 2711.96 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:38:32,373 epoch 92 - iter 1498/2142 - loss 0.00070617 - time (sec): 129.86 - samples/sec: 2713.71 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:38:50,978 epoch 92 - iter 1712/2142 - loss 0.00074360 - time (sec): 148.46 - samples/sec: 2716.22 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:39:09,281 epoch 92 - iter 1926/2142 - loss 0.00073853 - time (sec): 166.76 - samples/sec: 2722.84 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:39:28,173 epoch 92 - iter 2140/2142 - loss 0.00078107 - time (sec): 185.66 - samples/sec: 2722.94 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:39:28,312 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:39:28,313 EPOCH 92 done: loss 0.0008 - lr: 0.000002 +2024-10-02 03:39:38,100 DEV : loss 0.31766098737716675 - f1-score (micro avg) 0.916 +2024-10-02 03:39:38,129 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:39:56,403 epoch 93 - iter 214/2142 - loss 0.00050213 - time (sec): 18.27 - samples/sec: 2771.28 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:40:14,952 epoch 93 - iter 428/2142 - loss 0.00032703 - time (sec): 36.82 - samples/sec: 2754.47 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:40:33,274 epoch 93 - iter 642/2142 - loss 0.00037916 - time (sec): 55.14 - samples/sec: 2739.44 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:40:52,279 epoch 93 - iter 856/2142 - loss 0.00058462 - time (sec): 74.15 - samples/sec: 2726.63 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:41:10,698 epoch 93 - iter 1070/2142 - loss 0.00060514 - time (sec): 92.57 - samples/sec: 2727.73 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:41:29,355 epoch 93 - iter 1284/2142 - loss 0.00057040 - time (sec): 111.22 - samples/sec: 2730.14 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:41:47,634 epoch 93 - iter 1498/2142 - loss 0.00064375 - time (sec): 129.50 - samples/sec: 2730.11 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:42:06,088 epoch 93 - iter 1712/2142 - loss 0.00070097 - time (sec): 147.96 - samples/sec: 2731.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:42:24,527 epoch 93 - iter 1926/2142 - loss 0.00071859 - time (sec): 166.40 - samples/sec: 2734.11 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:42:43,123 epoch 93 - iter 2140/2142 - loss 0.00071361 - time (sec): 184.99 - samples/sec: 2732.75 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:42:43,278 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:42:43,278 EPOCH 93 done: loss 0.0007 - lr: 0.000002 +2024-10-02 03:42:53,414 DEV : loss 0.3104941248893738 - f1-score (micro avg) 0.9173 +2024-10-02 03:42:53,447 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:43:11,717 epoch 94 - iter 214/2142 - loss 0.00133368 - time (sec): 18.27 - samples/sec: 2744.34 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:43:30,733 epoch 94 - iter 428/2142 - loss 0.00129525 - time (sec): 37.28 - samples/sec: 2706.68 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:43:49,795 epoch 94 - iter 642/2142 - loss 0.00099335 - time (sec): 56.35 - samples/sec: 2696.28 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:44:08,187 epoch 94 - iter 856/2142 - loss 0.00088806 - time (sec): 74.74 - samples/sec: 2699.95 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:44:26,970 epoch 94 - iter 1070/2142 - loss 0.00094313 - time (sec): 93.52 - samples/sec: 2701.74 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:44:45,531 epoch 94 - iter 1284/2142 - loss 0.00094888 - time (sec): 112.08 - samples/sec: 2709.77 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:45:04,024 epoch 94 - iter 1498/2142 - loss 0.00086327 - time (sec): 130.58 - samples/sec: 2712.66 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:45:22,325 epoch 94 - iter 1712/2142 - loss 0.00077233 - time (sec): 148.88 - samples/sec: 2719.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:45:41,083 epoch 94 - iter 1926/2142 - loss 0.00074494 - time (sec): 167.63 - samples/sec: 2713.93 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:45:59,316 epoch 94 - iter 2140/2142 - loss 0.00080927 - time (sec): 185.87 - samples/sec: 2719.46 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:45:59,462 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:45:59,463 EPOCH 94 done: loss 0.0008 - lr: 0.000002 +2024-10-02 03:46:08,503 DEV : loss 0.3086221516132355 - f1-score (micro avg) 0.9167 +2024-10-02 03:46:08,531 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:46:26,840 epoch 95 - iter 214/2142 - loss 0.00032554 - time (sec): 18.31 - samples/sec: 2721.80 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:46:44,995 epoch 95 - iter 428/2142 - loss 0.00039837 - time (sec): 36.46 - samples/sec: 2714.27 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:47:03,472 epoch 95 - iter 642/2142 - loss 0.00043610 - time (sec): 54.94 - samples/sec: 2729.07 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:47:22,309 epoch 95 - iter 856/2142 - loss 0.00068093 - time (sec): 73.78 - samples/sec: 2717.15 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:47:41,012 epoch 95 - iter 1070/2142 - loss 0.00057882 - time (sec): 92.48 - samples/sec: 2709.45 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:47:59,635 epoch 95 - iter 1284/2142 - loss 0.00063412 - time (sec): 111.10 - samples/sec: 2710.42 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:48:18,706 epoch 95 - iter 1498/2142 - loss 0.00084009 - time (sec): 130.17 - samples/sec: 2712.84 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:48:37,525 epoch 95 - iter 1712/2142 - loss 0.00086407 - time (sec): 148.99 - samples/sec: 2715.30 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:48:56,092 epoch 95 - iter 1926/2142 - loss 0.00097936 - time (sec): 167.56 - samples/sec: 2717.00 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:49:14,226 epoch 95 - iter 2140/2142 - loss 0.00101967 - time (sec): 185.69 - samples/sec: 2722.06 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:49:14,395 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:49:14,396 EPOCH 95 done: loss 0.0010 - lr: 0.000002 +2024-10-02 03:49:24,465 DEV : loss 0.31199052929878235 - f1-score (micro avg) 0.9166 +2024-10-02 03:49:24,493 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:49:42,910 epoch 96 - iter 214/2142 - loss 0.00057614 - time (sec): 18.42 - samples/sec: 2701.85 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:50:01,213 epoch 96 - iter 428/2142 - loss 0.00045003 - time (sec): 36.72 - samples/sec: 2726.42 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:50:19,747 epoch 96 - iter 642/2142 - loss 0.00083908 - time (sec): 55.25 - samples/sec: 2712.23 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:50:39,026 epoch 96 - iter 856/2142 - loss 0.00099517 - time (sec): 74.53 - samples/sec: 2714.69 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:50:57,534 epoch 96 - iter 1070/2142 - loss 0.00092379 - time (sec): 93.04 - samples/sec: 2717.55 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:51:16,010 epoch 96 - iter 1284/2142 - loss 0.00087178 - time (sec): 111.52 - samples/sec: 2721.01 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:51:34,407 epoch 96 - iter 1498/2142 - loss 0.00087558 - time (sec): 129.91 - samples/sec: 2721.28 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:51:52,826 epoch 96 - iter 1712/2142 - loss 0.00089952 - time (sec): 148.33 - samples/sec: 2725.98 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:52:11,802 epoch 96 - iter 1926/2142 - loss 0.00083395 - time (sec): 167.31 - samples/sec: 2722.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:52:30,028 epoch 96 - iter 2140/2142 - loss 0.00083432 - time (sec): 185.53 - samples/sec: 2724.29 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:52:30,193 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:52:30,194 EPOCH 96 done: loss 0.0008 - lr: 0.000002 +2024-10-02 03:52:40,192 DEV : loss 0.3132540285587311 - f1-score (micro avg) 0.917 +2024-10-02 03:52:40,221 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:52:58,744 epoch 97 - iter 214/2142 - loss 0.00080299 - time (sec): 18.52 - samples/sec: 2711.14 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:53:17,539 epoch 97 - iter 428/2142 - loss 0.00091519 - time (sec): 37.32 - samples/sec: 2709.28 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:53:36,309 epoch 97 - iter 642/2142 - loss 0.00105123 - time (sec): 56.09 - samples/sec: 2713.98 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:53:54,805 epoch 97 - iter 856/2142 - loss 0.00092027 - time (sec): 74.58 - samples/sec: 2708.63 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:54:13,415 epoch 97 - iter 1070/2142 - loss 0.00082154 - time (sec): 93.19 - samples/sec: 2705.50 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:54:32,270 epoch 97 - iter 1284/2142 - loss 0.00094348 - time (sec): 112.05 - samples/sec: 2706.32 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:54:50,480 epoch 97 - iter 1498/2142 - loss 0.00088345 - time (sec): 130.26 - samples/sec: 2717.15 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:55:08,910 epoch 97 - iter 1712/2142 - loss 0.00086394 - time (sec): 148.69 - samples/sec: 2723.04 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:55:27,348 epoch 97 - iter 1926/2142 - loss 0.00086460 - time (sec): 167.13 - samples/sec: 2725.40 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:55:45,893 epoch 97 - iter 2140/2142 - loss 0.00089739 - time (sec): 185.67 - samples/sec: 2722.65 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:55:46,047 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:55:46,048 EPOCH 97 done: loss 0.0009 - lr: 0.000002 +2024-10-02 03:55:55,198 DEV : loss 0.30876144766807556 - f1-score (micro avg) 0.917 +2024-10-02 03:55:55,232 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:56:14,032 epoch 98 - iter 214/2142 - loss 0.00097665 - time (sec): 18.80 - samples/sec: 2736.92 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:56:32,494 epoch 98 - iter 428/2142 - loss 0.00119927 - time (sec): 37.26 - samples/sec: 2711.48 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:56:51,176 epoch 98 - iter 642/2142 - loss 0.00096027 - time (sec): 55.94 - samples/sec: 2714.89 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:57:09,909 epoch 98 - iter 856/2142 - loss 0.00107607 - time (sec): 74.68 - samples/sec: 2711.73 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:57:28,903 epoch 98 - iter 1070/2142 - loss 0.00109084 - time (sec): 93.67 - samples/sec: 2719.22 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:57:47,264 epoch 98 - iter 1284/2142 - loss 0.00099281 - time (sec): 112.03 - samples/sec: 2722.84 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:58:05,967 epoch 98 - iter 1498/2142 - loss 0.00103840 - time (sec): 130.73 - samples/sec: 2721.65 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:58:24,728 epoch 98 - iter 1712/2142 - loss 0.00095733 - time (sec): 149.50 - samples/sec: 2711.87 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:58:42,901 epoch 98 - iter 1926/2142 - loss 0.00090971 - time (sec): 167.67 - samples/sec: 2716.04 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:59:01,461 epoch 98 - iter 2140/2142 - loss 0.00092160 - time (sec): 186.23 - samples/sec: 2714.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:59:01,635 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:59:01,635 EPOCH 98 done: loss 0.0009 - lr: 0.000002 +2024-10-02 03:59:11,510 DEV : loss 0.3124086260795593 - f1-score (micro avg) 0.9177 +2024-10-02 03:59:11,542 ---------------------------------------------------------------------------------------------------- +2024-10-02 03:59:30,351 epoch 99 - iter 214/2142 - loss 0.00113449 - time (sec): 18.81 - samples/sec: 2751.26 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 03:59:48,816 epoch 99 - iter 428/2142 - loss 0.00090802 - time (sec): 37.27 - samples/sec: 2713.43 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:00:07,458 epoch 99 - iter 642/2142 - loss 0.00086916 - time (sec): 55.91 - samples/sec: 2708.94 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:00:25,495 epoch 99 - iter 856/2142 - loss 0.00077221 - time (sec): 73.95 - samples/sec: 2732.79 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:00:44,330 epoch 99 - iter 1070/2142 - loss 0.00070201 - time (sec): 92.79 - samples/sec: 2729.94 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:01:02,960 epoch 99 - iter 1284/2142 - loss 0.00072076 - time (sec): 111.42 - samples/sec: 2724.29 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:01:21,687 epoch 99 - iter 1498/2142 - loss 0.00073639 - time (sec): 130.14 - samples/sec: 2721.71 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:01:40,318 epoch 99 - iter 1712/2142 - loss 0.00069855 - time (sec): 148.77 - samples/sec: 2721.21 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:01:58,759 epoch 99 - iter 1926/2142 - loss 0.00066577 - time (sec): 167.22 - samples/sec: 2724.30 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:02:17,162 epoch 99 - iter 2140/2142 - loss 0.00066411 - time (sec): 185.62 - samples/sec: 2723.15 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:02:17,360 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:02:17,361 EPOCH 99 done: loss 0.0007 - lr: 0.000002 +2024-10-02 04:02:27,343 DEV : loss 0.3166857957839966 - f1-score (micro avg) 0.9192 +2024-10-02 04:02:27,371 saving best model +2024-10-02 04:02:30,797 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:02:49,194 epoch 100 - iter 214/2142 - loss 0.00082052 - time (sec): 18.40 - samples/sec: 2737.06 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:03:07,668 epoch 100 - iter 428/2142 - loss 0.00093919 - time (sec): 36.87 - samples/sec: 2726.79 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:03:26,236 epoch 100 - iter 642/2142 - loss 0.00104534 - time (sec): 55.44 - samples/sec: 2724.36 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:03:45,288 epoch 100 - iter 856/2142 - loss 0.00097670 - time (sec): 74.49 - samples/sec: 2729.16 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:04:03,705 epoch 100 - iter 1070/2142 - loss 0.00084298 - time (sec): 92.91 - samples/sec: 2725.45 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:04:21,797 epoch 100 - iter 1284/2142 - loss 0.00076275 - time (sec): 111.00 - samples/sec: 2727.02 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:04:40,574 epoch 100 - iter 1498/2142 - loss 0.00089271 - time (sec): 129.78 - samples/sec: 2726.92 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:04:59,020 epoch 100 - iter 1712/2142 - loss 0.00093787 - time (sec): 148.22 - samples/sec: 2726.21 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:05:17,845 epoch 100 - iter 1926/2142 - loss 0.00091710 - time (sec): 167.05 - samples/sec: 2726.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:05:36,499 epoch 100 - iter 2140/2142 - loss 0.00093084 - time (sec): 185.70 - samples/sec: 2722.11 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:05:36,652 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:05:36,653 EPOCH 100 done: loss 0.0009 - lr: 0.000002 +2024-10-02 04:05:45,679 DEV : loss 0.3115612864494324 - f1-score (micro avg) 0.9199 +2024-10-02 04:05:45,707 saving best model +2024-10-02 04:05:49,480 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:06:07,988 epoch 101 - iter 214/2142 - loss 0.00104461 - time (sec): 18.50 - samples/sec: 2763.19 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:06:26,625 epoch 101 - iter 428/2142 - loss 0.00091675 - time (sec): 37.14 - samples/sec: 2729.40 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:06:45,473 epoch 101 - iter 642/2142 - loss 0.00096957 - time (sec): 55.99 - samples/sec: 2716.33 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:07:03,771 epoch 101 - iter 856/2142 - loss 0.00113574 - time (sec): 74.29 - samples/sec: 2717.31 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:07:22,306 epoch 101 - iter 1070/2142 - loss 0.00109477 - time (sec): 92.82 - samples/sec: 2710.00 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:07:40,954 epoch 101 - iter 1284/2142 - loss 0.00099194 - time (sec): 111.47 - samples/sec: 2711.11 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:07:59,715 epoch 101 - iter 1498/2142 - loss 0.00110908 - time (sec): 130.23 - samples/sec: 2708.21 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:08:18,228 epoch 101 - iter 1712/2142 - loss 0.00113048 - time (sec): 148.74 - samples/sec: 2708.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:08:36,992 epoch 101 - iter 1926/2142 - loss 0.00103088 - time (sec): 167.51 - samples/sec: 2708.20 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:08:56,130 epoch 101 - iter 2140/2142 - loss 0.00103684 - time (sec): 186.64 - samples/sec: 2708.70 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:08:56,276 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:08:56,276 EPOCH 101 done: loss 0.0010 - lr: 0.000002 +2024-10-02 04:09:06,242 DEV : loss 0.3091588020324707 - f1-score (micro avg) 0.9177 +2024-10-02 04:09:06,272 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:09:25,141 epoch 102 - iter 214/2142 - loss 0.00034440 - time (sec): 18.87 - samples/sec: 2699.01 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:09:43,642 epoch 102 - iter 428/2142 - loss 0.00028455 - time (sec): 37.37 - samples/sec: 2731.26 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:10:01,923 epoch 102 - iter 642/2142 - loss 0.00032031 - time (sec): 55.65 - samples/sec: 2738.21 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:10:20,916 epoch 102 - iter 856/2142 - loss 0.00039148 - time (sec): 74.64 - samples/sec: 2724.33 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:10:39,345 epoch 102 - iter 1070/2142 - loss 0.00035934 - time (sec): 93.07 - samples/sec: 2719.38 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:10:57,954 epoch 102 - iter 1284/2142 - loss 0.00046164 - time (sec): 111.68 - samples/sec: 2716.44 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:11:16,770 epoch 102 - iter 1498/2142 - loss 0.00051054 - time (sec): 130.50 - samples/sec: 2715.20 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:11:35,359 epoch 102 - iter 1712/2142 - loss 0.00061914 - time (sec): 149.09 - samples/sec: 2713.39 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:11:54,095 epoch 102 - iter 1926/2142 - loss 0.00064286 - time (sec): 167.82 - samples/sec: 2710.72 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:12:12,303 epoch 102 - iter 2140/2142 - loss 0.00067450 - time (sec): 186.03 - samples/sec: 2716.91 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:12:12,466 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:12:12,467 EPOCH 102 done: loss 0.0007 - lr: 0.000002 +2024-10-02 04:12:22,390 DEV : loss 0.3121803402900696 - f1-score (micro avg) 0.919 +2024-10-02 04:12:22,418 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:12:41,004 epoch 103 - iter 214/2142 - loss 0.00041426 - time (sec): 18.59 - samples/sec: 2742.99 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:12:59,514 epoch 103 - iter 428/2142 - loss 0.00121514 - time (sec): 37.09 - samples/sec: 2723.07 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:13:18,229 epoch 103 - iter 642/2142 - loss 0.00110597 - time (sec): 55.81 - samples/sec: 2710.29 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:13:36,680 epoch 103 - iter 856/2142 - loss 0.00106886 - time (sec): 74.26 - samples/sec: 2719.34 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:13:55,605 epoch 103 - iter 1070/2142 - loss 0.00090673 - time (sec): 93.19 - samples/sec: 2700.44 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:14:14,152 epoch 103 - iter 1284/2142 - loss 0.00084943 - time (sec): 111.73 - samples/sec: 2702.39 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:14:32,879 epoch 103 - iter 1498/2142 - loss 0.00098301 - time (sec): 130.46 - samples/sec: 2701.78 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:14:51,742 epoch 103 - iter 1712/2142 - loss 0.00097425 - time (sec): 149.32 - samples/sec: 2699.70 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:15:10,471 epoch 103 - iter 1926/2142 - loss 0.00097651 - time (sec): 168.05 - samples/sec: 2706.27 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:15:29,110 epoch 103 - iter 2140/2142 - loss 0.00095476 - time (sec): 186.69 - samples/sec: 2707.77 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:15:29,260 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:15:29,261 EPOCH 103 done: loss 0.0010 - lr: 0.000002 +2024-10-02 04:15:38,455 DEV : loss 0.31358224153518677 - f1-score (micro avg) 0.9169 +2024-10-02 04:15:38,485 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:15:57,365 epoch 104 - iter 214/2142 - loss 0.00084368 - time (sec): 18.88 - samples/sec: 2671.43 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:16:15,441 epoch 104 - iter 428/2142 - loss 0.00113763 - time (sec): 36.95 - samples/sec: 2693.99 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:16:33,789 epoch 104 - iter 642/2142 - loss 0.00083190 - time (sec): 55.30 - samples/sec: 2705.61 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:16:52,384 epoch 104 - iter 856/2142 - loss 0.00075271 - time (sec): 73.90 - samples/sec: 2701.99 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:17:11,461 epoch 104 - iter 1070/2142 - loss 0.00086585 - time (sec): 92.97 - samples/sec: 2697.50 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:17:30,250 epoch 104 - iter 1284/2142 - loss 0.00084343 - time (sec): 111.76 - samples/sec: 2701.28 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:17:49,185 epoch 104 - iter 1498/2142 - loss 0.00084176 - time (sec): 130.70 - samples/sec: 2703.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:18:07,658 epoch 104 - iter 1712/2142 - loss 0.00097995 - time (sec): 149.17 - samples/sec: 2705.20 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:18:26,247 epoch 104 - iter 1926/2142 - loss 0.00092006 - time (sec): 167.76 - samples/sec: 2710.60 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:18:44,582 epoch 104 - iter 2140/2142 - loss 0.00087887 - time (sec): 186.10 - samples/sec: 2716.76 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:18:44,725 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:18:44,726 EPOCH 104 done: loss 0.0009 - lr: 0.000002 +2024-10-02 04:18:54,474 DEV : loss 0.31673723459243774 - f1-score (micro avg) 0.9177 +2024-10-02 04:18:54,503 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:19:13,360 epoch 105 - iter 214/2142 - loss 0.00041225 - time (sec): 18.86 - samples/sec: 2665.02 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:19:32,066 epoch 105 - iter 428/2142 - loss 0.00060999 - time (sec): 37.56 - samples/sec: 2705.38 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:19:50,572 epoch 105 - iter 642/2142 - loss 0.00076982 - time (sec): 56.07 - samples/sec: 2713.13 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:20:08,914 epoch 105 - iter 856/2142 - loss 0.00072432 - time (sec): 74.41 - samples/sec: 2717.30 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:20:27,597 epoch 105 - iter 1070/2142 - loss 0.00104110 - time (sec): 93.09 - samples/sec: 2707.06 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:20:46,435 epoch 105 - iter 1284/2142 - loss 0.00106422 - time (sec): 111.93 - samples/sec: 2717.21 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:21:04,831 epoch 105 - iter 1498/2142 - loss 0.00095849 - time (sec): 130.33 - samples/sec: 2719.11 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:21:23,006 epoch 105 - iter 1712/2142 - loss 0.00087590 - time (sec): 148.50 - samples/sec: 2725.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:21:41,191 epoch 105 - iter 1926/2142 - loss 0.00084966 - time (sec): 166.69 - samples/sec: 2730.07 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:21:59,822 epoch 105 - iter 2140/2142 - loss 0.00082837 - time (sec): 185.32 - samples/sec: 2727.81 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:21:59,971 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:21:59,972 EPOCH 105 done: loss 0.0008 - lr: 0.000002 +2024-10-02 04:22:09,756 DEV : loss 0.3118349313735962 - f1-score (micro avg) 0.9173 +2024-10-02 04:22:09,789 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:22:28,203 epoch 106 - iter 214/2142 - loss 0.00066522 - time (sec): 18.41 - samples/sec: 2801.85 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:22:46,872 epoch 106 - iter 428/2142 - loss 0.00076250 - time (sec): 37.08 - samples/sec: 2740.93 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:23:05,352 epoch 106 - iter 642/2142 - loss 0.00054629 - time (sec): 55.56 - samples/sec: 2744.51 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:23:23,914 epoch 106 - iter 856/2142 - loss 0.00056756 - time (sec): 74.12 - samples/sec: 2736.39 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:23:42,513 epoch 106 - iter 1070/2142 - loss 0.00057539 - time (sec): 92.72 - samples/sec: 2739.35 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:24:01,051 epoch 106 - iter 1284/2142 - loss 0.00059208 - time (sec): 111.26 - samples/sec: 2737.04 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:24:19,728 epoch 106 - iter 1498/2142 - loss 0.00059603 - time (sec): 129.94 - samples/sec: 2730.84 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:24:38,278 epoch 106 - iter 1712/2142 - loss 0.00076161 - time (sec): 148.49 - samples/sec: 2722.99 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:24:56,411 epoch 106 - iter 1926/2142 - loss 0.00073742 - time (sec): 166.62 - samples/sec: 2728.81 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:25:15,858 epoch 106 - iter 2140/2142 - loss 0.00073262 - time (sec): 186.07 - samples/sec: 2716.50 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:25:16,029 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:25:16,030 EPOCH 106 done: loss 0.0007 - lr: 0.000002 +2024-10-02 04:25:25,918 DEV : loss 0.3067542314529419 - f1-score (micro avg) 0.9173 +2024-10-02 04:25:25,952 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:25:44,350 epoch 107 - iter 214/2142 - loss 0.00007640 - time (sec): 18.40 - samples/sec: 2756.27 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:26:02,979 epoch 107 - iter 428/2142 - loss 0.00029893 - time (sec): 37.03 - samples/sec: 2725.87 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:26:21,627 epoch 107 - iter 642/2142 - loss 0.00042354 - time (sec): 55.67 - samples/sec: 2724.91 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:26:40,396 epoch 107 - iter 856/2142 - loss 0.00044665 - time (sec): 74.44 - samples/sec: 2709.88 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:26:58,907 epoch 107 - iter 1070/2142 - loss 0.00045958 - time (sec): 92.95 - samples/sec: 2716.85 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:27:17,835 epoch 107 - iter 1284/2142 - loss 0.00047348 - time (sec): 111.88 - samples/sec: 2705.68 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:27:36,705 epoch 107 - iter 1498/2142 - loss 0.00047609 - time (sec): 130.75 - samples/sec: 2703.12 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:27:55,308 epoch 107 - iter 1712/2142 - loss 0.00048128 - time (sec): 149.35 - samples/sec: 2707.83 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:28:13,638 epoch 107 - iter 1926/2142 - loss 0.00055399 - time (sec): 167.68 - samples/sec: 2713.66 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:28:32,005 epoch 107 - iter 2140/2142 - loss 0.00068884 - time (sec): 186.05 - samples/sec: 2717.56 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:28:32,169 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:28:32,169 EPOCH 107 done: loss 0.0007 - lr: 0.000002 +2024-10-02 04:28:41,362 DEV : loss 0.31450173258781433 - f1-score (micro avg) 0.9187 +2024-10-02 04:28:41,393 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:28:59,914 epoch 108 - iter 214/2142 - loss 0.00034800 - time (sec): 18.52 - samples/sec: 2734.02 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:29:18,435 epoch 108 - iter 428/2142 - loss 0.00030205 - time (sec): 37.04 - samples/sec: 2729.03 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:29:36,753 epoch 108 - iter 642/2142 - loss 0.00038076 - time (sec): 55.36 - samples/sec: 2745.42 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:29:55,251 epoch 108 - iter 856/2142 - loss 0.00053002 - time (sec): 73.86 - samples/sec: 2731.79 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:30:13,699 epoch 108 - iter 1070/2142 - loss 0.00046738 - time (sec): 92.30 - samples/sec: 2740.52 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:30:32,566 epoch 108 - iter 1284/2142 - loss 0.00062396 - time (sec): 111.17 - samples/sec: 2739.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:30:51,262 epoch 108 - iter 1498/2142 - loss 0.00059158 - time (sec): 129.87 - samples/sec: 2736.01 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:31:10,266 epoch 108 - iter 1712/2142 - loss 0.00059138 - time (sec): 148.87 - samples/sec: 2728.50 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:31:28,812 epoch 108 - iter 1926/2142 - loss 0.00071584 - time (sec): 167.42 - samples/sec: 2727.04 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:31:46,961 epoch 108 - iter 2140/2142 - loss 0.00068302 - time (sec): 185.57 - samples/sec: 2724.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:31:47,105 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:31:47,105 EPOCH 108 done: loss 0.0007 - lr: 0.000002 +2024-10-02 04:31:57,012 DEV : loss 0.307809054851532 - f1-score (micro avg) 0.9173 +2024-10-02 04:31:57,040 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:32:16,230 epoch 109 - iter 214/2142 - loss 0.00009124 - time (sec): 19.19 - samples/sec: 2733.29 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:32:35,073 epoch 109 - iter 428/2142 - loss 0.00036010 - time (sec): 38.03 - samples/sec: 2684.71 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:32:53,247 epoch 109 - iter 642/2142 - loss 0.00034937 - time (sec): 56.21 - samples/sec: 2687.12 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:33:11,867 epoch 109 - iter 856/2142 - loss 0.00029298 - time (sec): 74.83 - samples/sec: 2694.17 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:33:30,453 epoch 109 - iter 1070/2142 - loss 0.00036642 - time (sec): 93.41 - samples/sec: 2705.74 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:33:49,362 epoch 109 - iter 1284/2142 - loss 0.00062484 - time (sec): 112.32 - samples/sec: 2710.31 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:34:07,564 epoch 109 - iter 1498/2142 - loss 0.00057412 - time (sec): 130.52 - samples/sec: 2716.62 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:34:26,146 epoch 109 - iter 1712/2142 - loss 0.00055631 - time (sec): 149.11 - samples/sec: 2720.99 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:34:44,283 epoch 109 - iter 1926/2142 - loss 0.00059859 - time (sec): 167.24 - samples/sec: 2723.40 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:35:02,878 epoch 109 - iter 2140/2142 - loss 0.00070015 - time (sec): 185.84 - samples/sec: 2720.47 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:35:03,021 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:35:03,021 EPOCH 109 done: loss 0.0007 - lr: 0.000002 +2024-10-02 04:35:11,895 DEV : loss 0.3060796558856964 - f1-score (micro avg) 0.9181 +2024-10-02 04:35:11,923 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:35:30,321 epoch 110 - iter 214/2142 - loss 0.00075252 - time (sec): 18.40 - samples/sec: 2742.84 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:35:49,108 epoch 110 - iter 428/2142 - loss 0.00066222 - time (sec): 37.18 - samples/sec: 2720.24 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:36:07,717 epoch 110 - iter 642/2142 - loss 0.00065382 - time (sec): 55.79 - samples/sec: 2713.33 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:36:26,460 epoch 110 - iter 856/2142 - loss 0.00061207 - time (sec): 74.54 - samples/sec: 2721.45 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:36:45,118 epoch 110 - iter 1070/2142 - loss 0.00064683 - time (sec): 93.19 - samples/sec: 2726.96 - lr: 0.000002 - momentum: 0.000000 +2024-10-02 04:37:03,672 epoch 110 - iter 1284/2142 - loss 0.00067157 - time (sec): 111.75 - samples/sec: 2728.34 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:37:22,002 epoch 110 - iter 1498/2142 - loss 0.00066793 - time (sec): 130.08 - samples/sec: 2725.54 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:37:40,299 epoch 110 - iter 1712/2142 - loss 0.00059654 - time (sec): 148.38 - samples/sec: 2728.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:37:59,213 epoch 110 - iter 1926/2142 - loss 0.00054220 - time (sec): 167.29 - samples/sec: 2726.87 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:38:17,639 epoch 110 - iter 2140/2142 - loss 0.00065412 - time (sec): 185.71 - samples/sec: 2722.47 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:38:17,768 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:38:17,769 EPOCH 110 done: loss 0.0007 - lr: 0.000001 +2024-10-02 04:38:28,305 DEV : loss 0.3101864159107208 - f1-score (micro avg) 0.9187 +2024-10-02 04:38:28,335 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:38:47,234 epoch 111 - iter 214/2142 - loss 0.00030017 - time (sec): 18.90 - samples/sec: 2651.18 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:39:05,829 epoch 111 - iter 428/2142 - loss 0.00035961 - time (sec): 37.49 - samples/sec: 2694.47 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:39:24,205 epoch 111 - iter 642/2142 - loss 0.00060190 - time (sec): 55.87 - samples/sec: 2703.11 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:39:43,092 epoch 111 - iter 856/2142 - loss 0.00054872 - time (sec): 74.76 - samples/sec: 2693.92 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:40:01,436 epoch 111 - iter 1070/2142 - loss 0.00067679 - time (sec): 93.10 - samples/sec: 2705.89 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:40:19,999 epoch 111 - iter 1284/2142 - loss 0.00082965 - time (sec): 111.66 - samples/sec: 2711.04 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:40:38,258 epoch 111 - iter 1498/2142 - loss 0.00072524 - time (sec): 129.92 - samples/sec: 2715.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:40:57,420 epoch 111 - iter 1712/2142 - loss 0.00072070 - time (sec): 149.08 - samples/sec: 2713.20 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:41:15,880 epoch 111 - iter 1926/2142 - loss 0.00077204 - time (sec): 167.54 - samples/sec: 2712.55 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:41:34,959 epoch 111 - iter 2140/2142 - loss 0.00079143 - time (sec): 186.62 - samples/sec: 2708.88 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:41:35,123 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:41:35,123 EPOCH 111 done: loss 0.0008 - lr: 0.000001 +2024-10-02 04:41:44,959 DEV : loss 0.31595951318740845 - f1-score (micro avg) 0.918 +2024-10-02 04:41:44,990 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:42:03,404 epoch 112 - iter 214/2142 - loss 0.00104673 - time (sec): 18.41 - samples/sec: 2699.71 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:42:22,112 epoch 112 - iter 428/2142 - loss 0.00103033 - time (sec): 37.12 - samples/sec: 2729.07 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:42:40,755 epoch 112 - iter 642/2142 - loss 0.00096377 - time (sec): 55.76 - samples/sec: 2713.01 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:42:59,674 epoch 112 - iter 856/2142 - loss 0.00088384 - time (sec): 74.68 - samples/sec: 2708.50 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:43:18,507 epoch 112 - iter 1070/2142 - loss 0.00088275 - time (sec): 93.52 - samples/sec: 2707.83 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:43:36,747 epoch 112 - iter 1284/2142 - loss 0.00082626 - time (sec): 111.76 - samples/sec: 2715.28 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:43:55,099 epoch 112 - iter 1498/2142 - loss 0.00083363 - time (sec): 130.11 - samples/sec: 2715.08 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:44:13,476 epoch 112 - iter 1712/2142 - loss 0.00074616 - time (sec): 148.48 - samples/sec: 2718.95 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:44:32,671 epoch 112 - iter 1926/2142 - loss 0.00074092 - time (sec): 167.68 - samples/sec: 2715.07 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:44:51,261 epoch 112 - iter 2140/2142 - loss 0.00078579 - time (sec): 186.27 - samples/sec: 2714.38 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:44:51,384 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:44:51,385 EPOCH 112 done: loss 0.0008 - lr: 0.000001 +2024-10-02 04:45:01,449 DEV : loss 0.31776201725006104 - f1-score (micro avg) 0.9173 +2024-10-02 04:45:01,478 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:45:19,783 epoch 113 - iter 214/2142 - loss 0.00065900 - time (sec): 18.30 - samples/sec: 2713.47 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:45:38,068 epoch 113 - iter 428/2142 - loss 0.00045570 - time (sec): 36.59 - samples/sec: 2724.81 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:45:57,334 epoch 113 - iter 642/2142 - loss 0.00050134 - time (sec): 55.85 - samples/sec: 2695.63 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:46:15,849 epoch 113 - iter 856/2142 - loss 0.00060954 - time (sec): 74.37 - samples/sec: 2708.67 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:46:34,217 epoch 113 - iter 1070/2142 - loss 0.00054885 - time (sec): 92.74 - samples/sec: 2716.97 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:46:53,001 epoch 113 - iter 1284/2142 - loss 0.00056822 - time (sec): 111.52 - samples/sec: 2721.28 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:47:11,613 epoch 113 - iter 1498/2142 - loss 0.00057947 - time (sec): 130.13 - samples/sec: 2719.37 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:47:30,965 epoch 113 - iter 1712/2142 - loss 0.00058859 - time (sec): 149.49 - samples/sec: 2707.09 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:47:49,203 epoch 113 - iter 1926/2142 - loss 0.00064175 - time (sec): 167.72 - samples/sec: 2708.37 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:48:07,761 epoch 113 - iter 2140/2142 - loss 0.00076436 - time (sec): 186.28 - samples/sec: 2713.15 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:48:07,924 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:48:07,925 EPOCH 113 done: loss 0.0008 - lr: 0.000001 +2024-10-02 04:48:17,215 DEV : loss 0.3106064200401306 - f1-score (micro avg) 0.9179 +2024-10-02 04:48:17,247 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:48:36,513 epoch 114 - iter 214/2142 - loss 0.00016085 - time (sec): 19.26 - samples/sec: 2696.65 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:48:55,192 epoch 114 - iter 428/2142 - loss 0.00067557 - time (sec): 37.94 - samples/sec: 2695.18 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:49:13,438 epoch 114 - iter 642/2142 - loss 0.00063916 - time (sec): 56.19 - samples/sec: 2689.84 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:49:31,859 epoch 114 - iter 856/2142 - loss 0.00052853 - time (sec): 74.61 - samples/sec: 2697.08 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:49:50,463 epoch 114 - iter 1070/2142 - loss 0.00050867 - time (sec): 93.21 - samples/sec: 2699.32 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:50:09,060 epoch 114 - iter 1284/2142 - loss 0.00056723 - time (sec): 111.81 - samples/sec: 2700.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:50:27,692 epoch 114 - iter 1498/2142 - loss 0.00068086 - time (sec): 130.44 - samples/sec: 2703.23 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:50:46,412 epoch 114 - iter 1712/2142 - loss 0.00066153 - time (sec): 149.16 - samples/sec: 2707.20 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:51:05,545 epoch 114 - iter 1926/2142 - loss 0.00069987 - time (sec): 168.30 - samples/sec: 2706.97 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:51:23,822 epoch 114 - iter 2140/2142 - loss 0.00074183 - time (sec): 186.57 - samples/sec: 2709.47 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:51:23,980 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:51:23,980 EPOCH 114 done: loss 0.0007 - lr: 0.000001 +2024-10-02 04:51:33,842 DEV : loss 0.3146522641181946 - f1-score (micro avg) 0.9182 +2024-10-02 04:51:33,873 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:51:52,463 epoch 115 - iter 214/2142 - loss 0.00018833 - time (sec): 18.59 - samples/sec: 2728.84 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:52:11,297 epoch 115 - iter 428/2142 - loss 0.00028378 - time (sec): 37.42 - samples/sec: 2746.81 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:52:29,809 epoch 115 - iter 642/2142 - loss 0.00044721 - time (sec): 55.94 - samples/sec: 2741.43 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:52:48,598 epoch 115 - iter 856/2142 - loss 0.00064693 - time (sec): 74.72 - samples/sec: 2745.29 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:53:06,809 epoch 115 - iter 1070/2142 - loss 0.00062121 - time (sec): 92.94 - samples/sec: 2748.89 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:53:25,560 epoch 115 - iter 1284/2142 - loss 0.00065325 - time (sec): 111.69 - samples/sec: 2742.62 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:53:43,895 epoch 115 - iter 1498/2142 - loss 0.00061361 - time (sec): 130.02 - samples/sec: 2733.93 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:54:03,145 epoch 115 - iter 1712/2142 - loss 0.00068906 - time (sec): 149.27 - samples/sec: 2717.13 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:54:21,582 epoch 115 - iter 1926/2142 - loss 0.00069684 - time (sec): 167.71 - samples/sec: 2717.05 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:54:39,634 epoch 115 - iter 2140/2142 - loss 0.00066429 - time (sec): 185.76 - samples/sec: 2721.14 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:54:39,801 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:54:39,801 EPOCH 115 done: loss 0.0007 - lr: 0.000001 +2024-10-02 04:54:50,034 DEV : loss 0.31950661540031433 - f1-score (micro avg) 0.9178 +2024-10-02 04:54:50,067 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:55:08,659 epoch 116 - iter 214/2142 - loss 0.00037456 - time (sec): 18.59 - samples/sec: 2739.93 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:55:27,415 epoch 116 - iter 428/2142 - loss 0.00042235 - time (sec): 37.35 - samples/sec: 2734.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:55:45,969 epoch 116 - iter 642/2142 - loss 0.00036810 - time (sec): 55.90 - samples/sec: 2713.54 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:56:04,737 epoch 116 - iter 856/2142 - loss 0.00041696 - time (sec): 74.67 - samples/sec: 2713.58 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:56:23,683 epoch 116 - iter 1070/2142 - loss 0.00040463 - time (sec): 93.61 - samples/sec: 2714.48 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:56:42,295 epoch 116 - iter 1284/2142 - loss 0.00046562 - time (sec): 112.23 - samples/sec: 2716.37 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:57:00,488 epoch 116 - iter 1498/2142 - loss 0.00049164 - time (sec): 130.42 - samples/sec: 2728.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:57:18,781 epoch 116 - iter 1712/2142 - loss 0.00064234 - time (sec): 148.71 - samples/sec: 2729.68 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:57:37,370 epoch 116 - iter 1926/2142 - loss 0.00064544 - time (sec): 167.30 - samples/sec: 2729.91 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:57:55,831 epoch 116 - iter 2140/2142 - loss 0.00063217 - time (sec): 185.76 - samples/sec: 2721.21 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:57:55,996 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:57:55,997 EPOCH 116 done: loss 0.0006 - lr: 0.000001 +2024-10-02 04:58:05,271 DEV : loss 0.31484150886535645 - f1-score (micro avg) 0.9197 +2024-10-02 04:58:05,299 ---------------------------------------------------------------------------------------------------- +2024-10-02 04:58:24,816 epoch 117 - iter 214/2142 - loss 0.00038473 - time (sec): 19.52 - samples/sec: 2656.23 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:58:43,174 epoch 117 - iter 428/2142 - loss 0.00057168 - time (sec): 37.87 - samples/sec: 2700.34 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:59:02,074 epoch 117 - iter 642/2142 - loss 0.00056796 - time (sec): 56.77 - samples/sec: 2706.91 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:59:20,343 epoch 117 - iter 856/2142 - loss 0.00051891 - time (sec): 75.04 - samples/sec: 2710.56 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:59:38,859 epoch 117 - iter 1070/2142 - loss 0.00046937 - time (sec): 93.56 - samples/sec: 2707.13 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 04:59:57,479 epoch 117 - iter 1284/2142 - loss 0.00042903 - time (sec): 112.18 - samples/sec: 2699.86 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:00:16,053 epoch 117 - iter 1498/2142 - loss 0.00048764 - time (sec): 130.75 - samples/sec: 2705.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:00:35,065 epoch 117 - iter 1712/2142 - loss 0.00043719 - time (sec): 149.76 - samples/sec: 2705.23 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:00:53,332 epoch 117 - iter 1926/2142 - loss 0.00051680 - time (sec): 168.03 - samples/sec: 2708.89 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:01:11,793 epoch 117 - iter 2140/2142 - loss 0.00050313 - time (sec): 186.49 - samples/sec: 2710.97 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:01:11,962 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:01:11,962 EPOCH 117 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:01:21,811 DEV : loss 0.3192370533943176 - f1-score (micro avg) 0.9192 +2024-10-02 05:01:21,841 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:01:40,603 epoch 118 - iter 214/2142 - loss 0.00084082 - time (sec): 18.76 - samples/sec: 2706.29 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:01:59,175 epoch 118 - iter 428/2142 - loss 0.00054436 - time (sec): 37.33 - samples/sec: 2718.79 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:02:18,076 epoch 118 - iter 642/2142 - loss 0.00066337 - time (sec): 56.23 - samples/sec: 2715.50 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:02:36,454 epoch 118 - iter 856/2142 - loss 0.00088989 - time (sec): 74.61 - samples/sec: 2729.92 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:02:55,336 epoch 118 - iter 1070/2142 - loss 0.00102188 - time (sec): 93.49 - samples/sec: 2734.08 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:03:13,769 epoch 118 - iter 1284/2142 - loss 0.00090816 - time (sec): 111.93 - samples/sec: 2735.28 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:03:32,251 epoch 118 - iter 1498/2142 - loss 0.00082310 - time (sec): 130.41 - samples/sec: 2730.10 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:03:50,904 epoch 118 - iter 1712/2142 - loss 0.00083450 - time (sec): 149.06 - samples/sec: 2722.66 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:04:09,509 epoch 118 - iter 1926/2142 - loss 0.00080620 - time (sec): 167.67 - samples/sec: 2720.60 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:04:27,648 epoch 118 - iter 2140/2142 - loss 0.00073448 - time (sec): 185.81 - samples/sec: 2719.76 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:04:27,839 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:04:27,840 EPOCH 118 done: loss 0.0007 - lr: 0.000001 +2024-10-02 05:04:37,669 DEV : loss 0.312896192073822 - f1-score (micro avg) 0.9169 +2024-10-02 05:04:37,700 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:04:56,269 epoch 119 - iter 214/2142 - loss 0.00129713 - time (sec): 18.57 - samples/sec: 2789.81 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:05:14,686 epoch 119 - iter 428/2142 - loss 0.00113097 - time (sec): 36.98 - samples/sec: 2773.09 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:05:33,021 epoch 119 - iter 642/2142 - loss 0.00088737 - time (sec): 55.32 - samples/sec: 2745.41 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:05:51,619 epoch 119 - iter 856/2142 - loss 0.00080934 - time (sec): 73.92 - samples/sec: 2728.76 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:06:10,097 epoch 119 - iter 1070/2142 - loss 0.00073978 - time (sec): 92.40 - samples/sec: 2724.57 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:06:29,055 epoch 119 - iter 1284/2142 - loss 0.00064929 - time (sec): 111.35 - samples/sec: 2710.72 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:06:47,673 epoch 119 - iter 1498/2142 - loss 0.00066162 - time (sec): 129.97 - samples/sec: 2715.73 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:07:06,306 epoch 119 - iter 1712/2142 - loss 0.00067155 - time (sec): 148.60 - samples/sec: 2716.63 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:07:25,308 epoch 119 - iter 1926/2142 - loss 0.00068083 - time (sec): 167.61 - samples/sec: 2712.66 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:07:44,103 epoch 119 - iter 2140/2142 - loss 0.00062531 - time (sec): 186.40 - samples/sec: 2711.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:07:44,267 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:07:44,267 EPOCH 119 done: loss 0.0006 - lr: 0.000001 +2024-10-02 05:07:53,297 DEV : loss 0.31315940618515015 - f1-score (micro avg) 0.9182 +2024-10-02 05:07:53,326 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:08:12,388 epoch 120 - iter 214/2142 - loss 0.00030858 - time (sec): 19.06 - samples/sec: 2711.67 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:08:30,661 epoch 120 - iter 428/2142 - loss 0.00024138 - time (sec): 37.33 - samples/sec: 2687.93 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:08:49,399 epoch 120 - iter 642/2142 - loss 0.00045535 - time (sec): 56.07 - samples/sec: 2691.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:09:08,100 epoch 120 - iter 856/2142 - loss 0.00050287 - time (sec): 74.77 - samples/sec: 2700.70 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:09:26,931 epoch 120 - iter 1070/2142 - loss 0.00043655 - time (sec): 93.60 - samples/sec: 2703.70 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:09:45,434 epoch 120 - iter 1284/2142 - loss 0.00038310 - time (sec): 112.11 - samples/sec: 2711.62 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:10:04,281 epoch 120 - iter 1498/2142 - loss 0.00045136 - time (sec): 130.95 - samples/sec: 2710.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:10:22,450 epoch 120 - iter 1712/2142 - loss 0.00041232 - time (sec): 149.12 - samples/sec: 2713.08 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:10:41,160 epoch 120 - iter 1926/2142 - loss 0.00037951 - time (sec): 167.83 - samples/sec: 2711.72 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:11:00,229 epoch 120 - iter 2140/2142 - loss 0.00039536 - time (sec): 186.90 - samples/sec: 2704.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:11:00,401 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:11:00,402 EPOCH 120 done: loss 0.0004 - lr: 0.000001 +2024-10-02 05:11:10,381 DEV : loss 0.3164573609828949 - f1-score (micro avg) 0.9195 +2024-10-02 05:11:10,414 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:11:28,761 epoch 121 - iter 214/2142 - loss 0.00036932 - time (sec): 18.35 - samples/sec: 2738.86 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:11:47,383 epoch 121 - iter 428/2142 - loss 0.00023377 - time (sec): 36.97 - samples/sec: 2712.54 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:12:05,996 epoch 121 - iter 642/2142 - loss 0.00046642 - time (sec): 55.58 - samples/sec: 2709.77 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:12:24,832 epoch 121 - iter 856/2142 - loss 0.00052523 - time (sec): 74.42 - samples/sec: 2698.15 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:12:43,583 epoch 121 - iter 1070/2142 - loss 0.00050082 - time (sec): 93.17 - samples/sec: 2699.11 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:13:02,154 epoch 121 - iter 1284/2142 - loss 0.00047869 - time (sec): 111.74 - samples/sec: 2709.11 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:13:21,086 epoch 121 - iter 1498/2142 - loss 0.00051353 - time (sec): 130.67 - samples/sec: 2698.83 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:13:39,926 epoch 121 - iter 1712/2142 - loss 0.00057091 - time (sec): 149.51 - samples/sec: 2701.60 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:13:58,543 epoch 121 - iter 1926/2142 - loss 0.00054797 - time (sec): 168.13 - samples/sec: 2702.20 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:14:17,121 epoch 121 - iter 2140/2142 - loss 0.00051998 - time (sec): 186.71 - samples/sec: 2707.45 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:14:17,265 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:14:17,266 EPOCH 121 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:14:26,463 DEV : loss 0.30906662344932556 - f1-score (micro avg) 0.9185 +2024-10-02 05:14:26,496 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:14:45,269 epoch 122 - iter 214/2142 - loss 0.00019629 - time (sec): 18.77 - samples/sec: 2694.98 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:15:04,747 epoch 122 - iter 428/2142 - loss 0.00013434 - time (sec): 38.25 - samples/sec: 2623.14 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:15:23,557 epoch 122 - iter 642/2142 - loss 0.00024370 - time (sec): 57.06 - samples/sec: 2641.05 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:15:41,990 epoch 122 - iter 856/2142 - loss 0.00025266 - time (sec): 75.49 - samples/sec: 2657.10 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:16:00,230 epoch 122 - iter 1070/2142 - loss 0.00043323 - time (sec): 93.73 - samples/sec: 2669.91 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:16:19,221 epoch 122 - iter 1284/2142 - loss 0.00047974 - time (sec): 112.72 - samples/sec: 2683.64 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:16:37,883 epoch 122 - iter 1498/2142 - loss 0.00042740 - time (sec): 131.39 - samples/sec: 2688.47 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:16:56,614 epoch 122 - iter 1712/2142 - loss 0.00047019 - time (sec): 150.12 - samples/sec: 2691.11 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:17:15,205 epoch 122 - iter 1926/2142 - loss 0.00051150 - time (sec): 168.71 - samples/sec: 2693.75 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:17:34,028 epoch 122 - iter 2140/2142 - loss 0.00048371 - time (sec): 187.53 - samples/sec: 2695.86 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:17:34,200 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:17:34,201 EPOCH 122 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:17:43,668 DEV : loss 0.31340324878692627 - f1-score (micro avg) 0.9181 +2024-10-02 05:17:43,697 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:18:02,265 epoch 123 - iter 214/2142 - loss 0.00043373 - time (sec): 18.57 - samples/sec: 2723.01 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:18:20,915 epoch 123 - iter 428/2142 - loss 0.00030716 - time (sec): 37.22 - samples/sec: 2698.85 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:18:39,367 epoch 123 - iter 642/2142 - loss 0.00037481 - time (sec): 55.67 - samples/sec: 2712.32 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:18:58,711 epoch 123 - iter 856/2142 - loss 0.00041547 - time (sec): 75.01 - samples/sec: 2705.30 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:19:17,457 epoch 123 - iter 1070/2142 - loss 0.00044809 - time (sec): 93.76 - samples/sec: 2697.05 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:19:36,191 epoch 123 - iter 1284/2142 - loss 0.00054631 - time (sec): 112.49 - samples/sec: 2702.01 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:19:54,686 epoch 123 - iter 1498/2142 - loss 0.00056627 - time (sec): 130.99 - samples/sec: 2700.25 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:20:12,978 epoch 123 - iter 1712/2142 - loss 0.00055346 - time (sec): 149.28 - samples/sec: 2704.65 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:20:31,616 epoch 123 - iter 1926/2142 - loss 0.00051645 - time (sec): 167.92 - samples/sec: 2705.50 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:20:50,349 epoch 123 - iter 2140/2142 - loss 0.00053857 - time (sec): 186.65 - samples/sec: 2708.38 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:20:50,506 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:20:50,506 EPOCH 123 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:21:00,356 DEV : loss 0.3141731917858124 - f1-score (micro avg) 0.9202 +2024-10-02 05:21:00,386 saving best model +2024-10-02 05:21:04,180 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:21:22,471 epoch 124 - iter 214/2142 - loss 0.00099633 - time (sec): 18.29 - samples/sec: 2746.32 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:21:41,450 epoch 124 - iter 428/2142 - loss 0.00068456 - time (sec): 37.26 - samples/sec: 2713.78 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:21:59,755 epoch 124 - iter 642/2142 - loss 0.00089456 - time (sec): 55.57 - samples/sec: 2714.03 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:22:18,400 epoch 124 - iter 856/2142 - loss 0.00075147 - time (sec): 74.21 - samples/sec: 2717.27 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:22:37,081 epoch 124 - iter 1070/2142 - loss 0.00061993 - time (sec): 92.90 - samples/sec: 2716.71 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:22:55,461 epoch 124 - iter 1284/2142 - loss 0.00061630 - time (sec): 111.28 - samples/sec: 2720.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:23:14,444 epoch 124 - iter 1498/2142 - loss 0.00059140 - time (sec): 130.26 - samples/sec: 2719.21 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:23:32,805 epoch 124 - iter 1712/2142 - loss 0.00054592 - time (sec): 148.62 - samples/sec: 2717.71 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:23:52,022 epoch 124 - iter 1926/2142 - loss 0.00050102 - time (sec): 167.84 - samples/sec: 2709.41 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:24:10,444 epoch 124 - iter 2140/2142 - loss 0.00047159 - time (sec): 186.26 - samples/sec: 2714.09 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:24:10,624 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:24:10,625 EPOCH 124 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:24:20,075 DEV : loss 0.3147316575050354 - f1-score (micro avg) 0.9211 +2024-10-02 05:24:20,104 saving best model +2024-10-02 05:24:23,974 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:24:42,385 epoch 125 - iter 214/2142 - loss 0.00037335 - time (sec): 18.41 - samples/sec: 2725.53 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:25:00,710 epoch 125 - iter 428/2142 - loss 0.00026971 - time (sec): 36.73 - samples/sec: 2701.80 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:25:19,754 epoch 125 - iter 642/2142 - loss 0.00039903 - time (sec): 55.77 - samples/sec: 2691.75 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:25:38,385 epoch 125 - iter 856/2142 - loss 0.00039944 - time (sec): 74.41 - samples/sec: 2688.71 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:25:57,024 epoch 125 - iter 1070/2142 - loss 0.00039595 - time (sec): 93.04 - samples/sec: 2705.22 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:26:15,793 epoch 125 - iter 1284/2142 - loss 0.00038411 - time (sec): 111.81 - samples/sec: 2695.08 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:26:34,159 epoch 125 - iter 1498/2142 - loss 0.00037292 - time (sec): 130.18 - samples/sec: 2702.70 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:26:52,851 epoch 125 - iter 1712/2142 - loss 0.00043479 - time (sec): 148.87 - samples/sec: 2705.52 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:27:11,618 epoch 125 - iter 1926/2142 - loss 0.00043656 - time (sec): 167.64 - samples/sec: 2714.73 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:27:30,249 epoch 125 - iter 2140/2142 - loss 0.00042989 - time (sec): 186.27 - samples/sec: 2714.01 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:27:30,423 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:27:30,423 EPOCH 125 done: loss 0.0004 - lr: 0.000001 +2024-10-02 05:27:40,564 DEV : loss 0.3172149062156677 - f1-score (micro avg) 0.9194 +2024-10-02 05:27:40,598 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:27:59,143 epoch 126 - iter 214/2142 - loss 0.00100549 - time (sec): 18.54 - samples/sec: 2744.76 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:28:17,544 epoch 126 - iter 428/2142 - loss 0.00072404 - time (sec): 36.94 - samples/sec: 2744.31 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:28:35,884 epoch 126 - iter 642/2142 - loss 0.00053476 - time (sec): 55.28 - samples/sec: 2735.80 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:28:54,545 epoch 126 - iter 856/2142 - loss 0.00054433 - time (sec): 73.95 - samples/sec: 2735.90 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:29:13,356 epoch 126 - iter 1070/2142 - loss 0.00062636 - time (sec): 92.76 - samples/sec: 2735.36 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:29:32,216 epoch 126 - iter 1284/2142 - loss 0.00059291 - time (sec): 111.62 - samples/sec: 2721.61 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:29:50,764 epoch 126 - iter 1498/2142 - loss 0.00058869 - time (sec): 130.16 - samples/sec: 2718.42 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:30:09,482 epoch 126 - iter 1712/2142 - loss 0.00059675 - time (sec): 148.88 - samples/sec: 2720.70 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:30:27,877 epoch 126 - iter 1926/2142 - loss 0.00059282 - time (sec): 167.28 - samples/sec: 2718.67 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:30:46,354 epoch 126 - iter 2140/2142 - loss 0.00059243 - time (sec): 185.75 - samples/sec: 2721.48 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:30:46,513 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:30:46,513 EPOCH 126 done: loss 0.0006 - lr: 0.000001 +2024-10-02 05:30:56,343 DEV : loss 0.3199338912963867 - f1-score (micro avg) 0.9193 +2024-10-02 05:30:56,375 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:31:15,068 epoch 127 - iter 214/2142 - loss 0.00064255 - time (sec): 18.69 - samples/sec: 2733.53 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:31:33,291 epoch 127 - iter 428/2142 - loss 0.00054340 - time (sec): 36.91 - samples/sec: 2744.83 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:31:51,997 epoch 127 - iter 642/2142 - loss 0.00060584 - time (sec): 55.62 - samples/sec: 2741.84 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:32:10,207 epoch 127 - iter 856/2142 - loss 0.00053324 - time (sec): 73.83 - samples/sec: 2736.03 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:32:28,924 epoch 127 - iter 1070/2142 - loss 0.00051989 - time (sec): 92.55 - samples/sec: 2738.38 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:32:47,453 epoch 127 - iter 1284/2142 - loss 0.00049514 - time (sec): 111.08 - samples/sec: 2735.05 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:33:06,008 epoch 127 - iter 1498/2142 - loss 0.00047145 - time (sec): 129.63 - samples/sec: 2726.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:33:24,978 epoch 127 - iter 1712/2142 - loss 0.00049886 - time (sec): 148.60 - samples/sec: 2721.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:33:43,900 epoch 127 - iter 1926/2142 - loss 0.00050016 - time (sec): 167.52 - samples/sec: 2720.42 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:34:02,076 epoch 127 - iter 2140/2142 - loss 0.00052522 - time (sec): 185.70 - samples/sec: 2722.65 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:34:02,209 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:34:02,209 EPOCH 127 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:34:11,482 DEV : loss 0.32006824016571045 - f1-score (micro avg) 0.9177 +2024-10-02 05:34:11,510 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:34:30,541 epoch 128 - iter 214/2142 - loss 0.00013220 - time (sec): 19.03 - samples/sec: 2658.98 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:34:48,933 epoch 128 - iter 428/2142 - loss 0.00068365 - time (sec): 37.42 - samples/sec: 2703.08 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:35:07,241 epoch 128 - iter 642/2142 - loss 0.00057037 - time (sec): 55.73 - samples/sec: 2704.63 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:35:25,780 epoch 128 - iter 856/2142 - loss 0.00054183 - time (sec): 74.27 - samples/sec: 2703.12 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:35:44,107 epoch 128 - iter 1070/2142 - loss 0.00049347 - time (sec): 92.60 - samples/sec: 2702.84 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:36:02,834 epoch 128 - iter 1284/2142 - loss 0.00042421 - time (sec): 111.32 - samples/sec: 2698.43 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:36:21,620 epoch 128 - iter 1498/2142 - loss 0.00040769 - time (sec): 130.11 - samples/sec: 2702.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:36:40,150 epoch 128 - iter 1712/2142 - loss 0.00039198 - time (sec): 148.64 - samples/sec: 2712.92 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:36:58,886 epoch 128 - iter 1926/2142 - loss 0.00035857 - time (sec): 167.37 - samples/sec: 2711.31 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:37:18,206 epoch 128 - iter 2140/2142 - loss 0.00039801 - time (sec): 186.70 - samples/sec: 2707.82 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:37:18,355 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:37:18,355 EPOCH 128 done: loss 0.0004 - lr: 0.000001 +2024-10-02 05:37:28,603 DEV : loss 0.31836816668510437 - f1-score (micro avg) 0.9174 +2024-10-02 05:37:28,632 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:37:46,505 epoch 129 - iter 214/2142 - loss 0.00017520 - time (sec): 17.87 - samples/sec: 2776.40 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:38:05,457 epoch 129 - iter 428/2142 - loss 0.00030386 - time (sec): 36.82 - samples/sec: 2719.66 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:38:24,044 epoch 129 - iter 642/2142 - loss 0.00034187 - time (sec): 55.41 - samples/sec: 2708.07 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:38:42,658 epoch 129 - iter 856/2142 - loss 0.00033414 - time (sec): 74.02 - samples/sec: 2718.24 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:39:01,295 epoch 129 - iter 1070/2142 - loss 0.00032843 - time (sec): 92.66 - samples/sec: 2716.34 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:39:20,237 epoch 129 - iter 1284/2142 - loss 0.00029782 - time (sec): 111.60 - samples/sec: 2712.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:39:38,660 epoch 129 - iter 1498/2142 - loss 0.00036798 - time (sec): 130.03 - samples/sec: 2712.51 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:39:57,051 epoch 129 - iter 1712/2142 - loss 0.00040601 - time (sec): 148.42 - samples/sec: 2714.77 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:40:15,611 epoch 129 - iter 1926/2142 - loss 0.00046788 - time (sec): 166.98 - samples/sec: 2719.62 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:40:34,553 epoch 129 - iter 2140/2142 - loss 0.00047726 - time (sec): 185.92 - samples/sec: 2719.09 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:40:34,703 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:40:34,704 EPOCH 129 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:40:44,693 DEV : loss 0.3217557370662689 - f1-score (micro avg) 0.9189 +2024-10-02 05:40:44,727 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:41:03,740 epoch 130 - iter 214/2142 - loss 0.00015621 - time (sec): 19.01 - samples/sec: 2678.71 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:41:22,243 epoch 130 - iter 428/2142 - loss 0.00063830 - time (sec): 37.52 - samples/sec: 2693.96 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:41:40,955 epoch 130 - iter 642/2142 - loss 0.00106935 - time (sec): 56.23 - samples/sec: 2697.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:41:59,675 epoch 130 - iter 856/2142 - loss 0.00109578 - time (sec): 74.95 - samples/sec: 2710.17 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:42:18,206 epoch 130 - iter 1070/2142 - loss 0.00103659 - time (sec): 93.48 - samples/sec: 2718.25 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:42:36,781 epoch 130 - iter 1284/2142 - loss 0.00096809 - time (sec): 112.05 - samples/sec: 2717.06 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:42:55,428 epoch 130 - iter 1498/2142 - loss 0.00092276 - time (sec): 130.70 - samples/sec: 2710.10 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:43:14,190 epoch 130 - iter 1712/2142 - loss 0.00084015 - time (sec): 149.46 - samples/sec: 2711.83 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:43:32,431 epoch 130 - iter 1926/2142 - loss 0.00076219 - time (sec): 167.70 - samples/sec: 2713.81 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:43:50,974 epoch 130 - iter 2140/2142 - loss 0.00073113 - time (sec): 186.25 - samples/sec: 2713.98 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:43:51,140 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:43:51,141 EPOCH 130 done: loss 0.0007 - lr: 0.000001 +2024-10-02 05:44:00,087 DEV : loss 0.31561344861984253 - f1-score (micro avg) 0.918 +2024-10-02 05:44:00,119 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:44:18,571 epoch 131 - iter 214/2142 - loss 0.00056396 - time (sec): 18.45 - samples/sec: 2769.90 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:44:37,087 epoch 131 - iter 428/2142 - loss 0.00053900 - time (sec): 36.97 - samples/sec: 2732.27 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:44:55,496 epoch 131 - iter 642/2142 - loss 0.00048992 - time (sec): 55.37 - samples/sec: 2734.59 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:45:15,051 epoch 131 - iter 856/2142 - loss 0.00039750 - time (sec): 74.93 - samples/sec: 2700.98 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:45:33,725 epoch 131 - iter 1070/2142 - loss 0.00040864 - time (sec): 93.60 - samples/sec: 2702.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:45:52,782 epoch 131 - iter 1284/2142 - loss 0.00038899 - time (sec): 112.66 - samples/sec: 2704.26 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:46:11,418 epoch 131 - iter 1498/2142 - loss 0.00039048 - time (sec): 131.30 - samples/sec: 2696.18 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:46:30,132 epoch 131 - iter 1712/2142 - loss 0.00044337 - time (sec): 150.01 - samples/sec: 2699.13 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:46:48,836 epoch 131 - iter 1926/2142 - loss 0.00046471 - time (sec): 168.72 - samples/sec: 2697.75 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:47:07,712 epoch 131 - iter 2140/2142 - loss 0.00046502 - time (sec): 187.59 - samples/sec: 2694.99 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:47:07,876 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:47:07,877 EPOCH 131 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:47:18,486 DEV : loss 0.3174824118614197 - f1-score (micro avg) 0.9173 +2024-10-02 05:47:18,535 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:47:37,767 epoch 132 - iter 214/2142 - loss 0.00053865 - time (sec): 19.23 - samples/sec: 2649.17 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:47:57,111 epoch 132 - iter 428/2142 - loss 0.00036254 - time (sec): 38.57 - samples/sec: 2642.53 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:48:16,109 epoch 132 - iter 642/2142 - loss 0.00043365 - time (sec): 57.57 - samples/sec: 2641.24 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:48:35,468 epoch 132 - iter 856/2142 - loss 0.00047449 - time (sec): 76.93 - samples/sec: 2633.77 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:48:54,292 epoch 132 - iter 1070/2142 - loss 0.00050912 - time (sec): 95.76 - samples/sec: 2633.01 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:49:13,778 epoch 132 - iter 1284/2142 - loss 0.00049608 - time (sec): 115.24 - samples/sec: 2620.58 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:49:32,805 epoch 132 - iter 1498/2142 - loss 0.00044029 - time (sec): 134.27 - samples/sec: 2630.41 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:49:52,533 epoch 132 - iter 1712/2142 - loss 0.00044065 - time (sec): 154.00 - samples/sec: 2624.73 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:50:11,535 epoch 132 - iter 1926/2142 - loss 0.00047184 - time (sec): 173.00 - samples/sec: 2630.80 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:50:30,774 epoch 132 - iter 2140/2142 - loss 0.00046032 - time (sec): 192.24 - samples/sec: 2629.97 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:50:30,941 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:50:30,941 EPOCH 132 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:50:41,607 DEV : loss 0.3187948763370514 - f1-score (micro avg) 0.9185 +2024-10-02 05:50:41,639 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:51:01,049 epoch 133 - iter 214/2142 - loss 0.00061313 - time (sec): 19.41 - samples/sec: 2620.03 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:51:20,524 epoch 133 - iter 428/2142 - loss 0.00046023 - time (sec): 38.88 - samples/sec: 2625.98 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:51:39,899 epoch 133 - iter 642/2142 - loss 0.00045587 - time (sec): 58.26 - samples/sec: 2629.39 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:51:59,132 epoch 133 - iter 856/2142 - loss 0.00035964 - time (sec): 77.49 - samples/sec: 2621.04 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:52:17,928 epoch 133 - iter 1070/2142 - loss 0.00036593 - time (sec): 96.29 - samples/sec: 2633.23 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:52:36,848 epoch 133 - iter 1284/2142 - loss 0.00039894 - time (sec): 115.21 - samples/sec: 2636.33 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:52:55,966 epoch 133 - iter 1498/2142 - loss 0.00035003 - time (sec): 134.33 - samples/sec: 2633.12 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:53:15,063 epoch 133 - iter 1712/2142 - loss 0.00036201 - time (sec): 153.42 - samples/sec: 2632.65 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:53:34,751 epoch 133 - iter 1926/2142 - loss 0.00038807 - time (sec): 173.11 - samples/sec: 2626.50 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:53:54,442 epoch 133 - iter 2140/2142 - loss 0.00040466 - time (sec): 192.80 - samples/sec: 2621.84 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:53:54,591 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:53:54,592 EPOCH 133 done: loss 0.0004 - lr: 0.000001 +2024-10-02 05:54:04,305 DEV : loss 0.31487759947776794 - f1-score (micro avg) 0.9178 +2024-10-02 05:54:04,336 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:54:23,120 epoch 134 - iter 214/2142 - loss 0.00084514 - time (sec): 18.78 - samples/sec: 2691.11 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:54:43,061 epoch 134 - iter 428/2142 - loss 0.00052759 - time (sec): 38.72 - samples/sec: 2612.96 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:55:02,154 epoch 134 - iter 642/2142 - loss 0.00059420 - time (sec): 57.82 - samples/sec: 2609.52 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:55:21,712 epoch 134 - iter 856/2142 - loss 0.00054420 - time (sec): 77.37 - samples/sec: 2609.42 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:55:41,099 epoch 134 - iter 1070/2142 - loss 0.00044850 - time (sec): 96.76 - samples/sec: 2612.25 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:56:00,100 epoch 134 - iter 1284/2142 - loss 0.00038563 - time (sec): 115.76 - samples/sec: 2617.82 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:56:19,446 epoch 134 - iter 1498/2142 - loss 0.00040132 - time (sec): 135.11 - samples/sec: 2622.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:56:38,759 epoch 134 - iter 1712/2142 - loss 0.00037638 - time (sec): 154.42 - samples/sec: 2621.84 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:56:57,735 epoch 134 - iter 1926/2142 - loss 0.00043970 - time (sec): 173.40 - samples/sec: 2623.80 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:57:17,155 epoch 134 - iter 2140/2142 - loss 0.00046105 - time (sec): 192.82 - samples/sec: 2621.10 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:57:17,315 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:57:17,316 EPOCH 134 done: loss 0.0005 - lr: 0.000001 +2024-10-02 05:57:28,147 DEV : loss 0.3157444894313812 - f1-score (micro avg) 0.9182 +2024-10-02 05:57:28,180 ---------------------------------------------------------------------------------------------------- +2024-10-02 05:57:47,415 epoch 135 - iter 214/2142 - loss 0.00117212 - time (sec): 19.23 - samples/sec: 2660.57 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:58:06,655 epoch 135 - iter 428/2142 - loss 0.00092611 - time (sec): 38.47 - samples/sec: 2655.09 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:58:26,027 epoch 135 - iter 642/2142 - loss 0.00081802 - time (sec): 57.84 - samples/sec: 2659.69 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:58:45,318 epoch 135 - iter 856/2142 - loss 0.00067066 - time (sec): 77.14 - samples/sec: 2649.19 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:59:04,386 epoch 135 - iter 1070/2142 - loss 0.00064220 - time (sec): 96.20 - samples/sec: 2642.46 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:59:23,205 epoch 135 - iter 1284/2142 - loss 0.00059700 - time (sec): 115.02 - samples/sec: 2637.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 05:59:42,551 epoch 135 - iter 1498/2142 - loss 0.00056555 - time (sec): 134.37 - samples/sec: 2635.77 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:00:01,871 epoch 135 - iter 1712/2142 - loss 0.00049975 - time (sec): 153.69 - samples/sec: 2637.58 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:00:21,125 epoch 135 - iter 1926/2142 - loss 0.00048513 - time (sec): 172.94 - samples/sec: 2634.74 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:00:40,651 epoch 135 - iter 2140/2142 - loss 0.00050124 - time (sec): 192.47 - samples/sec: 2626.58 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:00:40,804 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:00:40,804 EPOCH 135 done: loss 0.0005 - lr: 0.000001 +2024-10-02 06:00:51,506 DEV : loss 0.3171662986278534 - f1-score (micro avg) 0.9193 +2024-10-02 06:00:51,536 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:01:10,668 epoch 136 - iter 214/2142 - loss 0.00071021 - time (sec): 19.13 - samples/sec: 2643.79 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:01:30,080 epoch 136 - iter 428/2142 - loss 0.00037429 - time (sec): 38.54 - samples/sec: 2649.79 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:01:49,332 epoch 136 - iter 642/2142 - loss 0.00043612 - time (sec): 57.80 - samples/sec: 2628.53 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:02:08,356 epoch 136 - iter 856/2142 - loss 0.00034536 - time (sec): 76.82 - samples/sec: 2617.67 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:02:27,336 epoch 136 - iter 1070/2142 - loss 0.00032533 - time (sec): 95.80 - samples/sec: 2626.73 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:02:46,969 epoch 136 - iter 1284/2142 - loss 0.00031695 - time (sec): 115.43 - samples/sec: 2616.90 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:03:06,228 epoch 136 - iter 1498/2142 - loss 0.00030501 - time (sec): 134.69 - samples/sec: 2621.75 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:03:25,410 epoch 136 - iter 1712/2142 - loss 0.00033173 - time (sec): 153.87 - samples/sec: 2627.35 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:03:44,501 epoch 136 - iter 1926/2142 - loss 0.00038857 - time (sec): 172.96 - samples/sec: 2629.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:04:03,631 epoch 136 - iter 2140/2142 - loss 0.00037767 - time (sec): 192.09 - samples/sec: 2631.58 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:04:03,791 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:04:03,792 EPOCH 136 done: loss 0.0004 - lr: 0.000001 +2024-10-02 06:04:13,515 DEV : loss 0.31947383284568787 - f1-score (micro avg) 0.9187 +2024-10-02 06:04:13,581 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:04:32,421 epoch 137 - iter 214/2142 - loss 0.00022668 - time (sec): 18.84 - samples/sec: 2678.49 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:04:51,256 epoch 137 - iter 428/2142 - loss 0.00036559 - time (sec): 37.67 - samples/sec: 2656.12 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:05:10,852 epoch 137 - iter 642/2142 - loss 0.00035368 - time (sec): 57.27 - samples/sec: 2646.31 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:05:30,120 epoch 137 - iter 856/2142 - loss 0.00049010 - time (sec): 76.54 - samples/sec: 2644.27 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:05:49,779 epoch 137 - iter 1070/2142 - loss 0.00039679 - time (sec): 96.19 - samples/sec: 2632.65 - lr: 0.000001 - momentum: 0.000000 +2024-10-02 06:06:09,674 epoch 137 - iter 1284/2142 - loss 0.00048071 - time (sec): 116.09 - samples/sec: 2624.46 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:06:28,967 epoch 137 - iter 1498/2142 - loss 0.00052700 - time (sec): 135.38 - samples/sec: 2623.60 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:06:48,047 epoch 137 - iter 1712/2142 - loss 0.00052852 - time (sec): 154.46 - samples/sec: 2623.58 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:07:07,901 epoch 137 - iter 1926/2142 - loss 0.00062900 - time (sec): 174.32 - samples/sec: 2613.37 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:07:27,854 epoch 137 - iter 2140/2142 - loss 0.00063585 - time (sec): 194.27 - samples/sec: 2602.23 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:07:28,016 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:07:28,016 EPOCH 137 done: loss 0.0006 - lr: 0.000000 +2024-10-02 06:07:38,876 DEV : loss 0.3180423974990845 - f1-score (micro avg) 0.9193 +2024-10-02 06:07:38,911 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:07:58,462 epoch 138 - iter 214/2142 - loss 0.00038125 - time (sec): 19.55 - samples/sec: 2582.91 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:08:17,942 epoch 138 - iter 428/2142 - loss 0.00060434 - time (sec): 39.03 - samples/sec: 2593.11 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:08:37,067 epoch 138 - iter 642/2142 - loss 0.00045241 - time (sec): 58.15 - samples/sec: 2581.28 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:08:56,153 epoch 138 - iter 856/2142 - loss 0.00054027 - time (sec): 77.24 - samples/sec: 2596.99 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:09:15,655 epoch 138 - iter 1070/2142 - loss 0.00051832 - time (sec): 96.74 - samples/sec: 2600.31 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:09:35,571 epoch 138 - iter 1284/2142 - loss 0.00053261 - time (sec): 116.66 - samples/sec: 2602.43 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:09:55,070 epoch 138 - iter 1498/2142 - loss 0.00048857 - time (sec): 136.16 - samples/sec: 2597.12 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:10:14,554 epoch 138 - iter 1712/2142 - loss 0.00047301 - time (sec): 155.64 - samples/sec: 2595.56 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:10:33,256 epoch 138 - iter 1926/2142 - loss 0.00046259 - time (sec): 174.34 - samples/sec: 2602.89 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:10:52,930 epoch 138 - iter 2140/2142 - loss 0.00046950 - time (sec): 194.02 - samples/sec: 2605.47 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:10:53,072 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:10:53,073 EPOCH 138 done: loss 0.0005 - lr: 0.000000 +2024-10-02 06:11:02,804 DEV : loss 0.3187938928604126 - f1-score (micro avg) 0.919 +2024-10-02 06:11:02,837 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:11:21,727 epoch 139 - iter 214/2142 - loss 0.00047127 - time (sec): 18.89 - samples/sec: 2658.43 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:11:41,613 epoch 139 - iter 428/2142 - loss 0.00036179 - time (sec): 38.77 - samples/sec: 2621.45 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:12:01,400 epoch 139 - iter 642/2142 - loss 0.00041219 - time (sec): 58.56 - samples/sec: 2594.39 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:12:20,777 epoch 139 - iter 856/2142 - loss 0.00041284 - time (sec): 77.94 - samples/sec: 2584.32 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:12:40,786 epoch 139 - iter 1070/2142 - loss 0.00040548 - time (sec): 97.95 - samples/sec: 2585.84 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:13:00,153 epoch 139 - iter 1284/2142 - loss 0.00038736 - time (sec): 117.31 - samples/sec: 2591.59 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:13:19,773 epoch 139 - iter 1498/2142 - loss 0.00034002 - time (sec): 136.93 - samples/sec: 2593.31 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:13:38,717 epoch 139 - iter 1712/2142 - loss 0.00035366 - time (sec): 155.88 - samples/sec: 2595.55 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:13:58,545 epoch 139 - iter 1926/2142 - loss 0.00039916 - time (sec): 175.71 - samples/sec: 2590.89 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:14:17,501 epoch 139 - iter 2140/2142 - loss 0.00037981 - time (sec): 194.66 - samples/sec: 2596.58 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:14:17,666 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:14:17,666 EPOCH 139 done: loss 0.0004 - lr: 0.000000 +2024-10-02 06:14:28,576 DEV : loss 0.31866297125816345 - f1-score (micro avg) 0.9191 +2024-10-02 06:14:28,607 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:14:47,706 epoch 140 - iter 214/2142 - loss 0.00098913 - time (sec): 19.10 - samples/sec: 2619.72 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:15:06,473 epoch 140 - iter 428/2142 - loss 0.00070272 - time (sec): 37.86 - samples/sec: 2640.70 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:15:26,382 epoch 140 - iter 642/2142 - loss 0.00048730 - time (sec): 57.77 - samples/sec: 2606.39 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:15:45,618 epoch 140 - iter 856/2142 - loss 0.00046725 - time (sec): 77.01 - samples/sec: 2603.21 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:16:05,073 epoch 140 - iter 1070/2142 - loss 0.00051891 - time (sec): 96.46 - samples/sec: 2603.12 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:16:24,672 epoch 140 - iter 1284/2142 - loss 0.00058237 - time (sec): 116.06 - samples/sec: 2599.49 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:16:43,533 epoch 140 - iter 1498/2142 - loss 0.00056083 - time (sec): 134.92 - samples/sec: 2608.51 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:17:02,901 epoch 140 - iter 1712/2142 - loss 0.00054691 - time (sec): 154.29 - samples/sec: 2610.09 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:17:22,625 epoch 140 - iter 1926/2142 - loss 0.00052059 - time (sec): 174.02 - samples/sec: 2611.81 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:17:41,843 epoch 140 - iter 2140/2142 - loss 0.00051133 - time (sec): 193.23 - samples/sec: 2615.92 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:17:41,997 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:17:41,998 EPOCH 140 done: loss 0.0005 - lr: 0.000000 +2024-10-02 06:17:52,879 DEV : loss 0.32070228457450867 - f1-score (micro avg) 0.9189 +2024-10-02 06:17:52,911 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:18:12,141 epoch 141 - iter 214/2142 - loss 0.00068567 - time (sec): 19.23 - samples/sec: 2633.68 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:18:31,332 epoch 141 - iter 428/2142 - loss 0.00059772 - time (sec): 38.42 - samples/sec: 2631.16 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:18:50,168 epoch 141 - iter 642/2142 - loss 0.00057337 - time (sec): 57.26 - samples/sec: 2632.00 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:19:09,536 epoch 141 - iter 856/2142 - loss 0.00048880 - time (sec): 76.62 - samples/sec: 2627.19 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:19:28,722 epoch 141 - iter 1070/2142 - loss 0.00046599 - time (sec): 95.81 - samples/sec: 2636.59 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:19:47,789 epoch 141 - iter 1284/2142 - loss 0.00042062 - time (sec): 114.88 - samples/sec: 2633.05 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:20:06,710 epoch 141 - iter 1498/2142 - loss 0.00045634 - time (sec): 133.80 - samples/sec: 2632.71 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:20:26,173 epoch 141 - iter 1712/2142 - loss 0.00045545 - time (sec): 153.26 - samples/sec: 2624.42 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:20:46,035 epoch 141 - iter 1926/2142 - loss 0.00047571 - time (sec): 173.12 - samples/sec: 2624.74 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:21:05,092 epoch 141 - iter 2140/2142 - loss 0.00044709 - time (sec): 192.18 - samples/sec: 2630.62 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:21:05,272 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:21:05,273 EPOCH 141 done: loss 0.0004 - lr: 0.000000 +2024-10-02 06:21:14,983 DEV : loss 0.3205887973308563 - f1-score (micro avg) 0.9182 +2024-10-02 06:21:15,020 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:21:34,385 epoch 142 - iter 214/2142 - loss 0.00021885 - time (sec): 19.36 - samples/sec: 2656.60 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:21:53,444 epoch 142 - iter 428/2142 - loss 0.00030090 - time (sec): 38.42 - samples/sec: 2642.73 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:22:12,648 epoch 142 - iter 642/2142 - loss 0.00025227 - time (sec): 57.63 - samples/sec: 2649.50 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:22:31,814 epoch 142 - iter 856/2142 - loss 0.00037208 - time (sec): 76.79 - samples/sec: 2637.28 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:22:50,604 epoch 142 - iter 1070/2142 - loss 0.00031853 - time (sec): 95.58 - samples/sec: 2643.95 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:23:10,031 epoch 142 - iter 1284/2142 - loss 0.00033508 - time (sec): 115.01 - samples/sec: 2639.68 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:23:29,190 epoch 142 - iter 1498/2142 - loss 0.00037965 - time (sec): 134.17 - samples/sec: 2640.22 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:23:48,033 epoch 142 - iter 1712/2142 - loss 0.00040419 - time (sec): 153.01 - samples/sec: 2642.22 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:24:07,226 epoch 142 - iter 1926/2142 - loss 0.00037945 - time (sec): 172.20 - samples/sec: 2644.20 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:24:26,795 epoch 142 - iter 2140/2142 - loss 0.00038349 - time (sec): 191.77 - samples/sec: 2636.03 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:24:26,976 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:24:26,976 EPOCH 142 done: loss 0.0004 - lr: 0.000000 +2024-10-02 06:24:37,740 DEV : loss 0.3213413655757904 - f1-score (micro avg) 0.9181 +2024-10-02 06:24:37,772 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:24:56,625 epoch 143 - iter 214/2142 - loss 0.00034986 - time (sec): 18.85 - samples/sec: 2594.56 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:25:15,906 epoch 143 - iter 428/2142 - loss 0.00045199 - time (sec): 38.13 - samples/sec: 2598.36 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:25:35,562 epoch 143 - iter 642/2142 - loss 0.00046886 - time (sec): 57.79 - samples/sec: 2590.04 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:25:54,981 epoch 143 - iter 856/2142 - loss 0.00040428 - time (sec): 77.21 - samples/sec: 2604.06 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:26:14,347 epoch 143 - iter 1070/2142 - loss 0.00036368 - time (sec): 96.57 - samples/sec: 2601.19 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:26:33,619 epoch 143 - iter 1284/2142 - loss 0.00039759 - time (sec): 115.84 - samples/sec: 2608.23 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:26:53,358 epoch 143 - iter 1498/2142 - loss 0.00042445 - time (sec): 135.58 - samples/sec: 2613.57 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:27:12,994 epoch 143 - iter 1712/2142 - loss 0.00053617 - time (sec): 155.22 - samples/sec: 2609.93 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:27:32,203 epoch 143 - iter 1926/2142 - loss 0.00052655 - time (sec): 174.43 - samples/sec: 2614.87 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:27:51,248 epoch 143 - iter 2140/2142 - loss 0.00050628 - time (sec): 193.47 - samples/sec: 2613.01 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:27:51,405 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:27:51,406 EPOCH 143 done: loss 0.0005 - lr: 0.000000 +2024-10-02 06:28:01,844 DEV : loss 0.3220960199832916 - f1-score (micro avg) 0.9183 +2024-10-02 06:28:01,885 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:28:20,706 epoch 144 - iter 214/2142 - loss 0.00010474 - time (sec): 18.82 - samples/sec: 2605.92 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:28:40,026 epoch 144 - iter 428/2142 - loss 0.00033361 - time (sec): 38.14 - samples/sec: 2600.70 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:28:59,416 epoch 144 - iter 642/2142 - loss 0.00027736 - time (sec): 57.53 - samples/sec: 2642.09 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:29:18,703 epoch 144 - iter 856/2142 - loss 0.00029249 - time (sec): 76.82 - samples/sec: 2633.77 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:29:38,232 epoch 144 - iter 1070/2142 - loss 0.00030540 - time (sec): 96.35 - samples/sec: 2626.97 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:29:57,165 epoch 144 - iter 1284/2142 - loss 0.00028130 - time (sec): 115.28 - samples/sec: 2629.23 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:30:15,895 epoch 144 - iter 1498/2142 - loss 0.00031087 - time (sec): 134.01 - samples/sec: 2633.56 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:30:35,189 epoch 144 - iter 1712/2142 - loss 0.00042762 - time (sec): 153.30 - samples/sec: 2629.89 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:30:54,378 epoch 144 - iter 1926/2142 - loss 0.00045034 - time (sec): 172.49 - samples/sec: 2632.10 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:31:14,247 epoch 144 - iter 2140/2142 - loss 0.00045341 - time (sec): 192.36 - samples/sec: 2628.38 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:31:14,393 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:31:14,394 EPOCH 144 done: loss 0.0005 - lr: 0.000000 +2024-10-02 06:31:24,050 DEV : loss 0.3229338228702545 - f1-score (micro avg) 0.9186 +2024-10-02 06:31:24,082 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:31:42,841 epoch 145 - iter 214/2142 - loss 0.00053726 - time (sec): 18.76 - samples/sec: 2590.81 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:32:01,760 epoch 145 - iter 428/2142 - loss 0.00041857 - time (sec): 37.68 - samples/sec: 2615.71 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:32:21,309 epoch 145 - iter 642/2142 - loss 0.00032805 - time (sec): 57.22 - samples/sec: 2635.32 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:32:40,763 epoch 145 - iter 856/2142 - loss 0.00044840 - time (sec): 76.68 - samples/sec: 2625.16 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:32:59,775 epoch 145 - iter 1070/2142 - loss 0.00039665 - time (sec): 95.69 - samples/sec: 2626.89 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:33:19,398 epoch 145 - iter 1284/2142 - loss 0.00041273 - time (sec): 115.31 - samples/sec: 2622.84 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:33:39,331 epoch 145 - iter 1498/2142 - loss 0.00038635 - time (sec): 135.25 - samples/sec: 2616.68 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:33:58,304 epoch 145 - iter 1712/2142 - loss 0.00035143 - time (sec): 154.22 - samples/sec: 2618.86 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:34:17,882 epoch 145 - iter 1926/2142 - loss 0.00034711 - time (sec): 173.80 - samples/sec: 2615.76 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:34:36,947 epoch 145 - iter 2140/2142 - loss 0.00033421 - time (sec): 192.86 - samples/sec: 2620.99 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:34:37,100 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:34:37,101 EPOCH 145 done: loss 0.0003 - lr: 0.000000 +2024-10-02 06:34:47,522 DEV : loss 0.32264310121536255 - f1-score (micro avg) 0.9188 +2024-10-02 06:34:47,554 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:35:07,110 epoch 146 - iter 214/2142 - loss 0.00079360 - time (sec): 19.55 - samples/sec: 2581.28 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:35:26,645 epoch 146 - iter 428/2142 - loss 0.00041915 - time (sec): 39.09 - samples/sec: 2602.02 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:35:46,100 epoch 146 - iter 642/2142 - loss 0.00046327 - time (sec): 58.54 - samples/sec: 2609.72 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:36:05,214 epoch 146 - iter 856/2142 - loss 0.00046153 - time (sec): 77.66 - samples/sec: 2616.76 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:36:24,560 epoch 146 - iter 1070/2142 - loss 0.00040984 - time (sec): 97.00 - samples/sec: 2617.73 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:36:44,008 epoch 146 - iter 1284/2142 - loss 0.00038049 - time (sec): 116.45 - samples/sec: 2616.29 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:37:03,303 epoch 146 - iter 1498/2142 - loss 0.00041066 - time (sec): 135.75 - samples/sec: 2615.34 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:37:22,302 epoch 146 - iter 1712/2142 - loss 0.00038377 - time (sec): 154.75 - samples/sec: 2613.20 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:37:41,349 epoch 146 - iter 1926/2142 - loss 0.00036082 - time (sec): 173.79 - samples/sec: 2616.05 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:38:00,714 epoch 146 - iter 2140/2142 - loss 0.00034434 - time (sec): 193.16 - samples/sec: 2616.38 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:38:00,908 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:38:00,908 EPOCH 146 done: loss 0.0003 - lr: 0.000000 +2024-10-02 06:38:11,395 DEV : loss 0.3237173855304718 - f1-score (micro avg) 0.9193 +2024-10-02 06:38:11,426 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:38:30,490 epoch 147 - iter 214/2142 - loss 0.00027439 - time (sec): 19.06 - samples/sec: 2581.39 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:38:50,217 epoch 147 - iter 428/2142 - loss 0.00024391 - time (sec): 38.79 - samples/sec: 2590.34 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:39:09,853 epoch 147 - iter 642/2142 - loss 0.00040227 - time (sec): 58.43 - samples/sec: 2583.63 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:39:29,179 epoch 147 - iter 856/2142 - loss 0.00042671 - time (sec): 77.75 - samples/sec: 2598.07 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:39:48,338 epoch 147 - iter 1070/2142 - loss 0.00047261 - time (sec): 96.91 - samples/sec: 2596.49 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:40:07,392 epoch 147 - iter 1284/2142 - loss 0.00049423 - time (sec): 115.96 - samples/sec: 2603.40 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:40:26,992 epoch 147 - iter 1498/2142 - loss 0.00043337 - time (sec): 135.56 - samples/sec: 2601.01 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:40:46,305 epoch 147 - iter 1712/2142 - loss 0.00038691 - time (sec): 154.88 - samples/sec: 2603.86 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:41:05,460 epoch 147 - iter 1926/2142 - loss 0.00036859 - time (sec): 174.03 - samples/sec: 2612.55 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:41:24,569 epoch 147 - iter 2140/2142 - loss 0.00035817 - time (sec): 193.14 - samples/sec: 2617.30 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:41:24,718 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:41:24,718 EPOCH 147 done: loss 0.0004 - lr: 0.000000 +2024-10-02 06:41:34,365 DEV : loss 0.32322174310684204 - f1-score (micro avg) 0.9186 +2024-10-02 06:41:34,400 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:41:53,413 epoch 148 - iter 214/2142 - loss 0.00039525 - time (sec): 19.01 - samples/sec: 2668.80 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:42:12,688 epoch 148 - iter 428/2142 - loss 0.00041463 - time (sec): 38.29 - samples/sec: 2668.77 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:42:31,785 epoch 148 - iter 642/2142 - loss 0.00046294 - time (sec): 57.38 - samples/sec: 2650.84 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:42:51,003 epoch 148 - iter 856/2142 - loss 0.00050068 - time (sec): 76.60 - samples/sec: 2647.74 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:43:10,209 epoch 148 - iter 1070/2142 - loss 0.00048569 - time (sec): 95.81 - samples/sec: 2642.43 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:43:29,489 epoch 148 - iter 1284/2142 - loss 0.00042447 - time (sec): 115.09 - samples/sec: 2642.69 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:43:48,116 epoch 148 - iter 1498/2142 - loss 0.00038088 - time (sec): 133.71 - samples/sec: 2638.66 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:44:07,377 epoch 148 - iter 1712/2142 - loss 0.00045826 - time (sec): 152.98 - samples/sec: 2639.21 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:44:27,101 epoch 148 - iter 1926/2142 - loss 0.00044504 - time (sec): 172.70 - samples/sec: 2633.47 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:44:46,766 epoch 148 - iter 2140/2142 - loss 0.00041363 - time (sec): 192.36 - samples/sec: 2628.07 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:44:46,932 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:44:46,933 EPOCH 148 done: loss 0.0004 - lr: 0.000000 +2024-10-02 06:44:57,671 DEV : loss 0.32282331585884094 - f1-score (micro avg) 0.9183 +2024-10-02 06:44:57,701 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:45:17,010 epoch 149 - iter 214/2142 - loss 0.00044157 - time (sec): 19.31 - samples/sec: 2658.77 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:45:36,350 epoch 149 - iter 428/2142 - loss 0.00046198 - time (sec): 38.65 - samples/sec: 2643.49 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:45:55,719 epoch 149 - iter 642/2142 - loss 0.00039360 - time (sec): 58.02 - samples/sec: 2625.46 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:46:15,418 epoch 149 - iter 856/2142 - loss 0.00035537 - time (sec): 77.72 - samples/sec: 2615.05 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:46:34,572 epoch 149 - iter 1070/2142 - loss 0.00039656 - time (sec): 96.87 - samples/sec: 2612.84 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:46:53,584 epoch 149 - iter 1284/2142 - loss 0.00038398 - time (sec): 115.88 - samples/sec: 2620.43 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:47:12,903 epoch 149 - iter 1498/2142 - loss 0.00044278 - time (sec): 135.20 - samples/sec: 2612.21 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:47:32,383 epoch 149 - iter 1712/2142 - loss 0.00041437 - time (sec): 154.68 - samples/sec: 2614.34 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:47:52,004 epoch 149 - iter 1926/2142 - loss 0.00039040 - time (sec): 174.30 - samples/sec: 2613.21 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:48:11,139 epoch 149 - iter 2140/2142 - loss 0.00036298 - time (sec): 193.44 - samples/sec: 2613.48 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:48:11,287 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:48:11,288 EPOCH 149 done: loss 0.0004 - lr: 0.000000 +2024-10-02 06:48:21,852 DEV : loss 0.323101669549942 - f1-score (micro avg) 0.9187 +2024-10-02 06:48:21,888 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:48:40,875 epoch 150 - iter 214/2142 - loss 0.00052187 - time (sec): 18.99 - samples/sec: 2652.84 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:48:59,949 epoch 150 - iter 428/2142 - loss 0.00047170 - time (sec): 38.06 - samples/sec: 2664.66 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:49:19,666 epoch 150 - iter 642/2142 - loss 0.00036539 - time (sec): 57.78 - samples/sec: 2647.68 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:49:38,996 epoch 150 - iter 856/2142 - loss 0.00033495 - time (sec): 77.11 - samples/sec: 2646.36 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:49:58,128 epoch 150 - iter 1070/2142 - loss 0.00034527 - time (sec): 96.24 - samples/sec: 2634.38 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:50:17,449 epoch 150 - iter 1284/2142 - loss 0.00032290 - time (sec): 115.56 - samples/sec: 2627.53 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:50:36,686 epoch 150 - iter 1498/2142 - loss 0.00034359 - time (sec): 134.80 - samples/sec: 2623.95 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:50:56,498 epoch 150 - iter 1712/2142 - loss 0.00032462 - time (sec): 154.61 - samples/sec: 2611.99 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:51:15,836 epoch 150 - iter 1926/2142 - loss 0.00030879 - time (sec): 173.95 - samples/sec: 2615.44 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:51:35,147 epoch 150 - iter 2140/2142 - loss 0.00031366 - time (sec): 193.26 - samples/sec: 2615.64 - lr: 0.000000 - momentum: 0.000000 +2024-10-02 06:51:35,301 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:51:35,301 EPOCH 150 done: loss 0.0003 - lr: 0.000000 +2024-10-02 06:51:45,169 DEV : loss 0.3229939639568329 - f1-score (micro avg) 0.9185 +2024-10-02 06:51:48,999 ---------------------------------------------------------------------------------------------------- +2024-10-02 06:51:49,000 Loading model from best epoch ... +2024-10-02 06:51:58,287 SequenceTagger predicts: Dictionary with 73 tags: O, S-ORG, B-ORG, E-ORG, I-ORG, S-PERSON, B-PERSON, E-PERSON, I-PERSON, S-CARDINAL, B-CARDINAL, E-CARDINAL, I-CARDINAL, S-GPE, B-GPE, E-GPE, I-GPE, S-DATE, B-DATE, E-DATE, I-DATE, S-ORDINAL, B-ORDINAL, E-ORDINAL, I-ORDINAL, S-PERCENT, B-PERCENT, E-PERCENT, I-PERCENT, S-LOC, B-LOC, E-LOC, I-LOC, S-NORP, B-NORP, E-NORP, I-NORP, S-MONEY, B-MONEY, E-MONEY, I-MONEY, S-TIME, B-TIME, E-TIME, I-TIME, S-EVENT, B-EVENT, E-EVENT, I-EVENT, S-PRODUCT +2024-10-02 06:52:07,171 +Results: +- F-score (micro) 0.9173 +- F-score (macro) 0.8778 +- Accuracy 0.8651 + +By class: + precision recall f1-score support + + ORG 0.8931 0.8847 0.8889 1388 + PERSON 0.9516 0.9724 0.9619 1051 + CARDINAL 0.9330 0.9627 0.9476 911 + DATE 0.9403 0.9403 0.9403 838 + GPE 0.9282 0.9552 0.9415 826 + PERCENT 0.9807 0.9854 0.9831 206 + LOC 0.8011 0.7921 0.7966 178 + ORDINAL 0.9477 0.9477 0.9477 172 + NORP 0.8690 0.8936 0.8811 141 + TIME 0.8951 0.9343 0.9143 137 + EVENT 0.6395 0.7231 0.6787 130 + MONEY 0.9818 0.9730 0.9774 111 + PRODUCT 0.7882 0.8072 0.7976 83 + WORK_OF_ART 0.8313 0.8214 0.8263 84 + FAC 0.6933 0.6753 0.6842 77 + QUANTITY 0.8636 0.8769 0.8702 65 + LAW 0.8214 0.8214 0.8214 28 + LANGUAGE 1.0000 0.8889 0.9412 9 + + micro avg 0.9112 0.9235 0.9173 6435 + macro avg 0.8755 0.8809 0.8778 6435 +weighted avg 0.9116 0.9235 0.9174 6435 + +2024-10-02 06:52:07,172 ----------------------------------------------------------------------------------------------------