Upload folder using huggingface_hub
Browse files- best-model.pt +3 -0
- dev.tsv +0 -0
- loss.tsv +11 -0
- runs/events.out.tfevents.1697559497.4c6324b99746.1390.6 +3 -0
- test.tsv +0 -0
- training.log +242 -0
best-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c309377fd8e28e7a5bdda1cf2bef1bb6eb22b69d49e175ddd13c562ea45abc4
|
3 |
+
size 440966725
|
dev.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
loss.tsv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
+
1 16:19:35 0.0000 0.7328 0.1595 0.6364 0.6255 0.6309 0.4728
|
3 |
+
2 16:20:59 0.0000 0.1539 0.1810 0.7449 0.6849 0.7136 0.5707
|
4 |
+
3 16:22:24 0.0000 0.0879 0.1748 0.7365 0.7670 0.7514 0.6205
|
5 |
+
4 16:23:48 0.0000 0.0531 0.1793 0.7806 0.7873 0.7840 0.6647
|
6 |
+
5 16:25:13 0.0000 0.0362 0.1936 0.7759 0.7795 0.7777 0.6533
|
7 |
+
6 16:26:36 0.0000 0.0214 0.2107 0.7794 0.7873 0.7834 0.6616
|
8 |
+
7 16:27:58 0.0000 0.0151 0.2255 0.7877 0.7920 0.7899 0.6718
|
9 |
+
8 16:29:23 0.0000 0.0103 0.2575 0.7864 0.7858 0.7861 0.6656
|
10 |
+
9 16:30:47 0.0000 0.0067 0.2568 0.7926 0.7889 0.7908 0.6687
|
11 |
+
10 16:32:11 0.0000 0.0039 0.2455 0.7874 0.8022 0.7947 0.6746
|
runs/events.out.tfevents.1697559497.4c6324b99746.1390.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8a46e4424a92928110784470d160130068f9cb927e480fc3df51dfe3090921a
|
3 |
+
size 502124
|
test.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training.log
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-17 16:18:17,473 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-10-17 16:18:17,475 Model: "SequenceTagger(
|
3 |
+
(embeddings): TransformerWordEmbeddings(
|
4 |
+
(model): ElectraModel(
|
5 |
+
(embeddings): ElectraEmbeddings(
|
6 |
+
(word_embeddings): Embedding(32001, 768)
|
7 |
+
(position_embeddings): Embedding(512, 768)
|
8 |
+
(token_type_embeddings): Embedding(2, 768)
|
9 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
10 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
11 |
+
)
|
12 |
+
(encoder): ElectraEncoder(
|
13 |
+
(layer): ModuleList(
|
14 |
+
(0-11): 12 x ElectraLayer(
|
15 |
+
(attention): ElectraAttention(
|
16 |
+
(self): ElectraSelfAttention(
|
17 |
+
(query): Linear(in_features=768, out_features=768, bias=True)
|
18 |
+
(key): Linear(in_features=768, out_features=768, bias=True)
|
19 |
+
(value): Linear(in_features=768, out_features=768, bias=True)
|
20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
21 |
+
)
|
22 |
+
(output): ElectraSelfOutput(
|
23 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
24 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
25 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
26 |
+
)
|
27 |
+
)
|
28 |
+
(intermediate): ElectraIntermediate(
|
29 |
+
(dense): Linear(in_features=768, out_features=3072, bias=True)
|
30 |
+
(intermediate_act_fn): GELUActivation()
|
31 |
+
)
|
32 |
+
(output): ElectraOutput(
|
33 |
+
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
34 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
35 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
)
|
40 |
+
)
|
41 |
+
)
|
42 |
+
(locked_dropout): LockedDropout(p=0.5)
|
43 |
+
(linear): Linear(in_features=768, out_features=21, bias=True)
|
44 |
+
(loss_function): CrossEntropyLoss()
|
45 |
+
)"
|
46 |
+
2023-10-17 16:18:17,475 ----------------------------------------------------------------------------------------------------
|
47 |
+
2023-10-17 16:18:17,475 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences
|
48 |
+
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /root/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator
|
49 |
+
2023-10-17 16:18:17,475 ----------------------------------------------------------------------------------------------------
|
50 |
+
2023-10-17 16:18:17,475 Train: 3575 sentences
|
51 |
+
2023-10-17 16:18:17,475 (train_with_dev=False, train_with_test=False)
|
52 |
+
2023-10-17 16:18:17,475 ----------------------------------------------------------------------------------------------------
|
53 |
+
2023-10-17 16:18:17,475 Training Params:
|
54 |
+
2023-10-17 16:18:17,476 - learning_rate: "3e-05"
|
55 |
+
2023-10-17 16:18:17,476 - mini_batch_size: "4"
|
56 |
+
2023-10-17 16:18:17,476 - max_epochs: "10"
|
57 |
+
2023-10-17 16:18:17,476 - shuffle: "True"
|
58 |
+
2023-10-17 16:18:17,476 ----------------------------------------------------------------------------------------------------
|
59 |
+
2023-10-17 16:18:17,476 Plugins:
|
60 |
+
2023-10-17 16:18:17,476 - TensorboardLogger
|
61 |
+
2023-10-17 16:18:17,476 - LinearScheduler | warmup_fraction: '0.1'
|
62 |
+
2023-10-17 16:18:17,476 ----------------------------------------------------------------------------------------------------
|
63 |
+
2023-10-17 16:18:17,476 Final evaluation on model from best epoch (best-model.pt)
|
64 |
+
2023-10-17 16:18:17,476 - metric: "('micro avg', 'f1-score')"
|
65 |
+
2023-10-17 16:18:17,476 ----------------------------------------------------------------------------------------------------
|
66 |
+
2023-10-17 16:18:17,476 Computation:
|
67 |
+
2023-10-17 16:18:17,476 - compute on device: cuda:0
|
68 |
+
2023-10-17 16:18:17,476 - embedding storage: none
|
69 |
+
2023-10-17 16:18:17,477 ----------------------------------------------------------------------------------------------------
|
70 |
+
2023-10-17 16:18:17,477 Model training base path: "hmbench-hipe2020/de-hmteams/teams-base-historic-multilingual-discriminator-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-2"
|
71 |
+
2023-10-17 16:18:17,477 ----------------------------------------------------------------------------------------------------
|
72 |
+
2023-10-17 16:18:17,477 ----------------------------------------------------------------------------------------------------
|
73 |
+
2023-10-17 16:18:17,477 Logging anything other than scalars to TensorBoard is currently not supported.
|
74 |
+
2023-10-17 16:18:24,576 epoch 1 - iter 89/894 - loss 3.49098594 - time (sec): 7.10 - samples/sec: 1258.54 - lr: 0.000003 - momentum: 0.000000
|
75 |
+
2023-10-17 16:18:31,595 epoch 1 - iter 178/894 - loss 2.32366876 - time (sec): 14.12 - samples/sec: 1228.67 - lr: 0.000006 - momentum: 0.000000
|
76 |
+
2023-10-17 16:18:38,525 epoch 1 - iter 267/894 - loss 1.75529411 - time (sec): 21.05 - samples/sec: 1189.14 - lr: 0.000009 - momentum: 0.000000
|
77 |
+
2023-10-17 16:18:45,733 epoch 1 - iter 356/894 - loss 1.39116171 - time (sec): 28.25 - samples/sec: 1225.66 - lr: 0.000012 - momentum: 0.000000
|
78 |
+
2023-10-17 16:18:52,748 epoch 1 - iter 445/894 - loss 1.18927583 - time (sec): 35.27 - samples/sec: 1221.71 - lr: 0.000015 - momentum: 0.000000
|
79 |
+
2023-10-17 16:18:59,818 epoch 1 - iter 534/894 - loss 1.04847785 - time (sec): 42.34 - samples/sec: 1221.03 - lr: 0.000018 - momentum: 0.000000
|
80 |
+
2023-10-17 16:19:06,763 epoch 1 - iter 623/894 - loss 0.94878418 - time (sec): 49.28 - samples/sec: 1209.92 - lr: 0.000021 - momentum: 0.000000
|
81 |
+
2023-10-17 16:19:13,830 epoch 1 - iter 712/894 - loss 0.86276987 - time (sec): 56.35 - samples/sec: 1215.14 - lr: 0.000024 - momentum: 0.000000
|
82 |
+
2023-10-17 16:19:21,649 epoch 1 - iter 801/894 - loss 0.79019396 - time (sec): 64.17 - samples/sec: 1213.93 - lr: 0.000027 - momentum: 0.000000
|
83 |
+
2023-10-17 16:19:29,090 epoch 1 - iter 890/894 - loss 0.73480438 - time (sec): 71.61 - samples/sec: 1204.46 - lr: 0.000030 - momentum: 0.000000
|
84 |
+
2023-10-17 16:19:29,405 ----------------------------------------------------------------------------------------------------
|
85 |
+
2023-10-17 16:19:29,405 EPOCH 1 done: loss 0.7328 - lr: 0.000030
|
86 |
+
2023-10-17 16:19:35,528 DEV : loss 0.15951120853424072 - f1-score (micro avg) 0.6309
|
87 |
+
2023-10-17 16:19:35,586 saving best model
|
88 |
+
2023-10-17 16:19:36,204 ----------------------------------------------------------------------------------------------------
|
89 |
+
2023-10-17 16:19:43,596 epoch 2 - iter 89/894 - loss 0.18396528 - time (sec): 7.39 - samples/sec: 1147.58 - lr: 0.000030 - momentum: 0.000000
|
90 |
+
2023-10-17 16:19:51,344 epoch 2 - iter 178/894 - loss 0.18794022 - time (sec): 15.14 - samples/sec: 1189.48 - lr: 0.000029 - momentum: 0.000000
|
91 |
+
2023-10-17 16:19:58,283 epoch 2 - iter 267/894 - loss 0.18661222 - time (sec): 22.08 - samples/sec: 1186.51 - lr: 0.000029 - momentum: 0.000000
|
92 |
+
2023-10-17 16:20:05,472 epoch 2 - iter 356/894 - loss 0.17708364 - time (sec): 29.26 - samples/sec: 1209.78 - lr: 0.000029 - momentum: 0.000000
|
93 |
+
2023-10-17 16:20:12,475 epoch 2 - iter 445/894 - loss 0.17032159 - time (sec): 36.27 - samples/sec: 1215.16 - lr: 0.000028 - momentum: 0.000000
|
94 |
+
2023-10-17 16:20:19,668 epoch 2 - iter 534/894 - loss 0.16365282 - time (sec): 43.46 - samples/sec: 1197.59 - lr: 0.000028 - momentum: 0.000000
|
95 |
+
2023-10-17 16:20:26,799 epoch 2 - iter 623/894 - loss 0.16022299 - time (sec): 50.59 - samples/sec: 1207.36 - lr: 0.000028 - momentum: 0.000000
|
96 |
+
2023-10-17 16:20:33,843 epoch 2 - iter 712/894 - loss 0.15724052 - time (sec): 57.64 - samples/sec: 1220.13 - lr: 0.000027 - momentum: 0.000000
|
97 |
+
2023-10-17 16:20:40,997 epoch 2 - iter 801/894 - loss 0.15532819 - time (sec): 64.79 - samples/sec: 1210.62 - lr: 0.000027 - momentum: 0.000000
|
98 |
+
2023-10-17 16:20:48,054 epoch 2 - iter 890/894 - loss 0.15430259 - time (sec): 71.85 - samples/sec: 1201.25 - lr: 0.000027 - momentum: 0.000000
|
99 |
+
2023-10-17 16:20:48,368 ----------------------------------------------------------------------------------------------------
|
100 |
+
2023-10-17 16:20:48,368 EPOCH 2 done: loss 0.1539 - lr: 0.000027
|
101 |
+
2023-10-17 16:20:59,492 DEV : loss 0.1809595227241516 - f1-score (micro avg) 0.7136
|
102 |
+
2023-10-17 16:20:59,549 saving best model
|
103 |
+
2023-10-17 16:21:00,966 ----------------------------------------------------------------------------------------------------
|
104 |
+
2023-10-17 16:21:08,129 epoch 3 - iter 89/894 - loss 0.09331837 - time (sec): 7.16 - samples/sec: 1124.91 - lr: 0.000026 - momentum: 0.000000
|
105 |
+
2023-10-17 16:21:15,444 epoch 3 - iter 178/894 - loss 0.08405150 - time (sec): 14.47 - samples/sec: 1159.98 - lr: 0.000026 - momentum: 0.000000
|
106 |
+
2023-10-17 16:21:23,055 epoch 3 - iter 267/894 - loss 0.07605411 - time (sec): 22.08 - samples/sec: 1167.85 - lr: 0.000026 - momentum: 0.000000
|
107 |
+
2023-10-17 16:21:30,524 epoch 3 - iter 356/894 - loss 0.07990159 - time (sec): 29.55 - samples/sec: 1152.16 - lr: 0.000025 - momentum: 0.000000
|
108 |
+
2023-10-17 16:21:37,586 epoch 3 - iter 445/894 - loss 0.07898967 - time (sec): 36.61 - samples/sec: 1180.95 - lr: 0.000025 - momentum: 0.000000
|
109 |
+
2023-10-17 16:21:44,567 epoch 3 - iter 534/894 - loss 0.08478162 - time (sec): 43.60 - samples/sec: 1180.53 - lr: 0.000025 - momentum: 0.000000
|
110 |
+
2023-10-17 16:21:51,598 epoch 3 - iter 623/894 - loss 0.08323998 - time (sec): 50.63 - samples/sec: 1194.00 - lr: 0.000024 - momentum: 0.000000
|
111 |
+
2023-10-17 16:21:58,523 epoch 3 - iter 712/894 - loss 0.08445594 - time (sec): 57.55 - samples/sec: 1198.00 - lr: 0.000024 - momentum: 0.000000
|
112 |
+
2023-10-17 16:22:05,473 epoch 3 - iter 801/894 - loss 0.08776340 - time (sec): 64.50 - samples/sec: 1199.14 - lr: 0.000024 - momentum: 0.000000
|
113 |
+
2023-10-17 16:22:12,539 epoch 3 - iter 890/894 - loss 0.08785018 - time (sec): 71.57 - samples/sec: 1204.47 - lr: 0.000023 - momentum: 0.000000
|
114 |
+
2023-10-17 16:22:12,841 ----------------------------------------------------------------------------------------------------
|
115 |
+
2023-10-17 16:22:12,841 EPOCH 3 done: loss 0.0879 - lr: 0.000023
|
116 |
+
2023-10-17 16:22:24,398 DEV : loss 0.17476926743984222 - f1-score (micro avg) 0.7514
|
117 |
+
2023-10-17 16:22:24,459 saving best model
|
118 |
+
2023-10-17 16:22:25,882 ----------------------------------------------------------------------------------------------------
|
119 |
+
2023-10-17 16:22:32,762 epoch 4 - iter 89/894 - loss 0.04063277 - time (sec): 6.88 - samples/sec: 1393.61 - lr: 0.000023 - momentum: 0.000000
|
120 |
+
2023-10-17 16:22:39,858 epoch 4 - iter 178/894 - loss 0.04513127 - time (sec): 13.97 - samples/sec: 1366.56 - lr: 0.000023 - momentum: 0.000000
|
121 |
+
2023-10-17 16:22:46,759 epoch 4 - iter 267/894 - loss 0.04728134 - time (sec): 20.87 - samples/sec: 1302.78 - lr: 0.000022 - momentum: 0.000000
|
122 |
+
2023-10-17 16:22:53,633 epoch 4 - iter 356/894 - loss 0.04603510 - time (sec): 27.75 - samples/sec: 1262.38 - lr: 0.000022 - momentum: 0.000000
|
123 |
+
2023-10-17 16:23:00,593 epoch 4 - iter 445/894 - loss 0.04807142 - time (sec): 34.71 - samples/sec: 1253.87 - lr: 0.000022 - momentum: 0.000000
|
124 |
+
2023-10-17 16:23:07,910 epoch 4 - iter 534/894 - loss 0.04880179 - time (sec): 42.02 - samples/sec: 1245.02 - lr: 0.000021 - momentum: 0.000000
|
125 |
+
2023-10-17 16:23:15,036 epoch 4 - iter 623/894 - loss 0.04793107 - time (sec): 49.15 - samples/sec: 1233.15 - lr: 0.000021 - momentum: 0.000000
|
126 |
+
2023-10-17 16:23:22,257 epoch 4 - iter 712/894 - loss 0.04983695 - time (sec): 56.37 - samples/sec: 1230.64 - lr: 0.000021 - momentum: 0.000000
|
127 |
+
2023-10-17 16:23:29,415 epoch 4 - iter 801/894 - loss 0.05080398 - time (sec): 63.53 - samples/sec: 1225.07 - lr: 0.000020 - momentum: 0.000000
|
128 |
+
2023-10-17 16:23:36,701 epoch 4 - iter 890/894 - loss 0.05295316 - time (sec): 70.81 - samples/sec: 1216.37 - lr: 0.000020 - momentum: 0.000000
|
129 |
+
2023-10-17 16:23:37,026 ----------------------------------------------------------------------------------------------------
|
130 |
+
2023-10-17 16:23:37,026 EPOCH 4 done: loss 0.0531 - lr: 0.000020
|
131 |
+
2023-10-17 16:23:48,561 DEV : loss 0.1792767345905304 - f1-score (micro avg) 0.784
|
132 |
+
2023-10-17 16:23:48,621 saving best model
|
133 |
+
2023-10-17 16:23:50,016 ----------------------------------------------------------------------------------------------------
|
134 |
+
2023-10-17 16:23:57,097 epoch 5 - iter 89/894 - loss 0.03003141 - time (sec): 7.08 - samples/sec: 1184.99 - lr: 0.000020 - momentum: 0.000000
|
135 |
+
2023-10-17 16:24:04,339 epoch 5 - iter 178/894 - loss 0.03386181 - time (sec): 14.32 - samples/sec: 1258.85 - lr: 0.000019 - momentum: 0.000000
|
136 |
+
2023-10-17 16:24:11,294 epoch 5 - iter 267/894 - loss 0.03450992 - time (sec): 21.27 - samples/sec: 1259.13 - lr: 0.000019 - momentum: 0.000000
|
137 |
+
2023-10-17 16:24:18,300 epoch 5 - iter 356/894 - loss 0.03581250 - time (sec): 28.28 - samples/sec: 1242.93 - lr: 0.000019 - momentum: 0.000000
|
138 |
+
2023-10-17 16:24:25,225 epoch 5 - iter 445/894 - loss 0.03969636 - time (sec): 35.21 - samples/sec: 1227.42 - lr: 0.000018 - momentum: 0.000000
|
139 |
+
2023-10-17 16:24:32,350 epoch 5 - iter 534/894 - loss 0.04027827 - time (sec): 42.33 - samples/sec: 1233.12 - lr: 0.000018 - momentum: 0.000000
|
140 |
+
2023-10-17 16:24:39,848 epoch 5 - iter 623/894 - loss 0.04050683 - time (sec): 49.83 - samples/sec: 1217.98 - lr: 0.000018 - momentum: 0.000000
|
141 |
+
2023-10-17 16:24:46,961 epoch 5 - iter 712/894 - loss 0.03821864 - time (sec): 56.94 - samples/sec: 1224.89 - lr: 0.000017 - momentum: 0.000000
|
142 |
+
2023-10-17 16:24:54,046 epoch 5 - iter 801/894 - loss 0.03790059 - time (sec): 64.03 - samples/sec: 1216.47 - lr: 0.000017 - momentum: 0.000000
|
143 |
+
2023-10-17 16:25:01,137 epoch 5 - iter 890/894 - loss 0.03627111 - time (sec): 71.12 - samples/sec: 1213.26 - lr: 0.000017 - momentum: 0.000000
|
144 |
+
2023-10-17 16:25:01,446 ----------------------------------------------------------------------------------------------------
|
145 |
+
2023-10-17 16:25:01,447 EPOCH 5 done: loss 0.0362 - lr: 0.000017
|
146 |
+
2023-10-17 16:25:13,187 DEV : loss 0.1935582309961319 - f1-score (micro avg) 0.7777
|
147 |
+
2023-10-17 16:25:13,243 ----------------------------------------------------------------------------------------------------
|
148 |
+
2023-10-17 16:25:20,221 epoch 6 - iter 89/894 - loss 0.03711638 - time (sec): 6.98 - samples/sec: 1275.39 - lr: 0.000016 - momentum: 0.000000
|
149 |
+
2023-10-17 16:25:27,241 epoch 6 - iter 178/894 - loss 0.02750927 - time (sec): 14.00 - samples/sec: 1256.60 - lr: 0.000016 - momentum: 0.000000
|
150 |
+
2023-10-17 16:25:33,829 epoch 6 - iter 267/894 - loss 0.02860102 - time (sec): 20.58 - samples/sec: 1256.66 - lr: 0.000016 - momentum: 0.000000
|
151 |
+
2023-10-17 16:25:40,054 epoch 6 - iter 356/894 - loss 0.03148660 - time (sec): 26.81 - samples/sec: 1277.67 - lr: 0.000015 - momentum: 0.000000
|
152 |
+
2023-10-17 16:25:46,934 epoch 6 - iter 445/894 - loss 0.02760601 - time (sec): 33.69 - samples/sec: 1276.12 - lr: 0.000015 - momentum: 0.000000
|
153 |
+
2023-10-17 16:25:54,136 epoch 6 - iter 534/894 - loss 0.02699126 - time (sec): 40.89 - samples/sec: 1251.24 - lr: 0.000015 - momentum: 0.000000
|
154 |
+
2023-10-17 16:26:01,914 epoch 6 - iter 623/894 - loss 0.02489164 - time (sec): 48.67 - samples/sec: 1215.02 - lr: 0.000014 - momentum: 0.000000
|
155 |
+
2023-10-17 16:26:10,004 epoch 6 - iter 712/894 - loss 0.02413339 - time (sec): 56.76 - samples/sec: 1201.89 - lr: 0.000014 - momentum: 0.000000
|
156 |
+
2023-10-17 16:26:17,118 epoch 6 - iter 801/894 - loss 0.02224205 - time (sec): 63.87 - samples/sec: 1199.72 - lr: 0.000014 - momentum: 0.000000
|
157 |
+
2023-10-17 16:26:24,447 epoch 6 - iter 890/894 - loss 0.02153523 - time (sec): 71.20 - samples/sec: 1210.61 - lr: 0.000013 - momentum: 0.000000
|
158 |
+
2023-10-17 16:26:24,756 ----------------------------------------------------------------------------------------------------
|
159 |
+
2023-10-17 16:26:24,757 EPOCH 6 done: loss 0.0214 - lr: 0.000013
|
160 |
+
2023-10-17 16:26:36,294 DEV : loss 0.21073873341083527 - f1-score (micro avg) 0.7834
|
161 |
+
2023-10-17 16:26:36,351 ----------------------------------------------------------------------------------------------------
|
162 |
+
2023-10-17 16:26:43,526 epoch 7 - iter 89/894 - loss 0.01344321 - time (sec): 7.17 - samples/sec: 1213.35 - lr: 0.000013 - momentum: 0.000000
|
163 |
+
2023-10-17 16:26:50,431 epoch 7 - iter 178/894 - loss 0.00956923 - time (sec): 14.08 - samples/sec: 1167.69 - lr: 0.000013 - momentum: 0.000000
|
164 |
+
2023-10-17 16:26:57,578 epoch 7 - iter 267/894 - loss 0.01112114 - time (sec): 21.22 - samples/sec: 1178.79 - lr: 0.000012 - momentum: 0.000000
|
165 |
+
2023-10-17 16:27:04,765 epoch 7 - iter 356/894 - loss 0.01197922 - time (sec): 28.41 - samples/sec: 1200.39 - lr: 0.000012 - momentum: 0.000000
|
166 |
+
2023-10-17 16:27:11,688 epoch 7 - iter 445/894 - loss 0.01450837 - time (sec): 35.33 - samples/sec: 1208.76 - lr: 0.000012 - momentum: 0.000000
|
167 |
+
2023-10-17 16:27:18,436 epoch 7 - iter 534/894 - loss 0.01471580 - time (sec): 42.08 - samples/sec: 1226.69 - lr: 0.000011 - momentum: 0.000000
|
168 |
+
2023-10-17 16:27:25,406 epoch 7 - iter 623/894 - loss 0.01479523 - time (sec): 49.05 - samples/sec: 1226.52 - lr: 0.000011 - momentum: 0.000000
|
169 |
+
2023-10-17 16:27:32,663 epoch 7 - iter 712/894 - loss 0.01445821 - time (sec): 56.31 - samples/sec: 1231.00 - lr: 0.000011 - momentum: 0.000000
|
170 |
+
2023-10-17 16:27:39,657 epoch 7 - iter 801/894 - loss 0.01556922 - time (sec): 63.30 - samples/sec: 1234.27 - lr: 0.000010 - momentum: 0.000000
|
171 |
+
2023-10-17 16:27:46,756 epoch 7 - iter 890/894 - loss 0.01512635 - time (sec): 70.40 - samples/sec: 1222.90 - lr: 0.000010 - momentum: 0.000000
|
172 |
+
2023-10-17 16:27:47,082 ----------------------------------------------------------------------------------------------------
|
173 |
+
2023-10-17 16:27:47,082 EPOCH 7 done: loss 0.0151 - lr: 0.000010
|
174 |
+
2023-10-17 16:27:58,328 DEV : loss 0.22550107538700104 - f1-score (micro avg) 0.7899
|
175 |
+
2023-10-17 16:27:58,394 saving best model
|
176 |
+
2023-10-17 16:27:59,799 ----------------------------------------------------------------------------------------------------
|
177 |
+
2023-10-17 16:28:06,882 epoch 8 - iter 89/894 - loss 0.00735787 - time (sec): 7.08 - samples/sec: 1243.80 - lr: 0.000010 - momentum: 0.000000
|
178 |
+
2023-10-17 16:28:13,927 epoch 8 - iter 178/894 - loss 0.01365307 - time (sec): 14.12 - samples/sec: 1205.85 - lr: 0.000009 - momentum: 0.000000
|
179 |
+
2023-10-17 16:28:21,045 epoch 8 - iter 267/894 - loss 0.01185970 - time (sec): 21.24 - samples/sec: 1194.91 - lr: 0.000009 - momentum: 0.000000
|
180 |
+
2023-10-17 16:28:28,164 epoch 8 - iter 356/894 - loss 0.01188072 - time (sec): 28.36 - samples/sec: 1184.91 - lr: 0.000009 - momentum: 0.000000
|
181 |
+
2023-10-17 16:28:35,573 epoch 8 - iter 445/894 - loss 0.01063050 - time (sec): 35.77 - samples/sec: 1219.19 - lr: 0.000008 - momentum: 0.000000
|
182 |
+
2023-10-17 16:28:43,036 epoch 8 - iter 534/894 - loss 0.01071909 - time (sec): 43.23 - samples/sec: 1214.11 - lr: 0.000008 - momentum: 0.000000
|
183 |
+
2023-10-17 16:28:50,128 epoch 8 - iter 623/894 - loss 0.00992864 - time (sec): 50.32 - samples/sec: 1224.89 - lr: 0.000008 - momentum: 0.000000
|
184 |
+
2023-10-17 16:28:57,943 epoch 8 - iter 712/894 - loss 0.00975064 - time (sec): 58.14 - samples/sec: 1198.00 - lr: 0.000007 - momentum: 0.000000
|
185 |
+
2023-10-17 16:29:05,257 epoch 8 - iter 801/894 - loss 0.01030314 - time (sec): 65.45 - samples/sec: 1192.10 - lr: 0.000007 - momentum: 0.000000
|
186 |
+
2023-10-17 16:29:12,309 epoch 8 - iter 890/894 - loss 0.01034406 - time (sec): 72.50 - samples/sec: 1190.39 - lr: 0.000007 - momentum: 0.000000
|
187 |
+
2023-10-17 16:29:12,613 ----------------------------------------------------------------------------------------------------
|
188 |
+
2023-10-17 16:29:12,613 EPOCH 8 done: loss 0.0103 - lr: 0.000007
|
189 |
+
2023-10-17 16:29:23,870 DEV : loss 0.2575424909591675 - f1-score (micro avg) 0.7861
|
190 |
+
2023-10-17 16:29:23,936 ----------------------------------------------------------------------------------------------------
|
191 |
+
2023-10-17 16:29:31,056 epoch 9 - iter 89/894 - loss 0.00456394 - time (sec): 7.12 - samples/sec: 1194.27 - lr: 0.000006 - momentum: 0.000000
|
192 |
+
2023-10-17 16:29:37,954 epoch 9 - iter 178/894 - loss 0.01007170 - time (sec): 14.02 - samples/sec: 1222.45 - lr: 0.000006 - momentum: 0.000000
|
193 |
+
2023-10-17 16:29:44,870 epoch 9 - iter 267/894 - loss 0.01043709 - time (sec): 20.93 - samples/sec: 1187.76 - lr: 0.000006 - momentum: 0.000000
|
194 |
+
2023-10-17 16:29:51,939 epoch 9 - iter 356/894 - loss 0.00821020 - time (sec): 28.00 - samples/sec: 1208.05 - lr: 0.000005 - momentum: 0.000000
|
195 |
+
2023-10-17 16:29:59,410 epoch 9 - iter 445/894 - loss 0.00750055 - time (sec): 35.47 - samples/sec: 1203.33 - lr: 0.000005 - momentum: 0.000000
|
196 |
+
2023-10-17 16:30:06,520 epoch 9 - iter 534/894 - loss 0.00728371 - time (sec): 42.58 - samples/sec: 1212.36 - lr: 0.000005 - momentum: 0.000000
|
197 |
+
2023-10-17 16:30:13,357 epoch 9 - iter 623/894 - loss 0.00665355 - time (sec): 49.42 - samples/sec: 1215.10 - lr: 0.000004 - momentum: 0.000000
|
198 |
+
2023-10-17 16:30:20,354 epoch 9 - iter 712/894 - loss 0.00712338 - time (sec): 56.41 - samples/sec: 1218.78 - lr: 0.000004 - momentum: 0.000000
|
199 |
+
2023-10-17 16:30:27,700 epoch 9 - iter 801/894 - loss 0.00673904 - time (sec): 63.76 - samples/sec: 1217.94 - lr: 0.000004 - momentum: 0.000000
|
200 |
+
2023-10-17 16:30:34,836 epoch 9 - iter 890/894 - loss 0.00673433 - time (sec): 70.90 - samples/sec: 1216.48 - lr: 0.000003 - momentum: 0.000000
|
201 |
+
2023-10-17 16:30:35,144 ----------------------------------------------------------------------------------------------------
|
202 |
+
2023-10-17 16:30:35,144 EPOCH 9 done: loss 0.0067 - lr: 0.000003
|
203 |
+
2023-10-17 16:30:46,951 DEV : loss 0.2568037509918213 - f1-score (micro avg) 0.7908
|
204 |
+
2023-10-17 16:30:47,017 saving best model
|
205 |
+
2023-10-17 16:30:48,440 ----------------------------------------------------------------------------------------------------
|
206 |
+
2023-10-17 16:30:55,777 epoch 10 - iter 89/894 - loss 0.00138104 - time (sec): 7.33 - samples/sec: 1254.10 - lr: 0.000003 - momentum: 0.000000
|
207 |
+
2023-10-17 16:31:02,774 epoch 10 - iter 178/894 - loss 0.00366289 - time (sec): 14.33 - samples/sec: 1208.69 - lr: 0.000003 - momentum: 0.000000
|
208 |
+
2023-10-17 16:31:09,867 epoch 10 - iter 267/894 - loss 0.00365606 - time (sec): 21.42 - samples/sec: 1185.24 - lr: 0.000002 - momentum: 0.000000
|
209 |
+
2023-10-17 16:31:16,830 epoch 10 - iter 356/894 - loss 0.00343159 - time (sec): 28.39 - samples/sec: 1198.61 - lr: 0.000002 - momentum: 0.000000
|
210 |
+
2023-10-17 16:31:23,966 epoch 10 - iter 445/894 - loss 0.00426874 - time (sec): 35.52 - samples/sec: 1202.62 - lr: 0.000002 - momentum: 0.000000
|
211 |
+
2023-10-17 16:31:31,313 epoch 10 - iter 534/894 - loss 0.00431619 - time (sec): 42.87 - samples/sec: 1218.53 - lr: 0.000001 - momentum: 0.000000
|
212 |
+
2023-10-17 16:31:38,177 epoch 10 - iter 623/894 - loss 0.00428653 - time (sec): 49.73 - samples/sec: 1204.91 - lr: 0.000001 - momentum: 0.000000
|
213 |
+
2023-10-17 16:31:45,304 epoch 10 - iter 712/894 - loss 0.00426761 - time (sec): 56.86 - samples/sec: 1208.58 - lr: 0.000001 - momentum: 0.000000
|
214 |
+
2023-10-17 16:31:52,251 epoch 10 - iter 801/894 - loss 0.00417570 - time (sec): 63.81 - samples/sec: 1205.29 - lr: 0.000000 - momentum: 0.000000
|
215 |
+
2023-10-17 16:31:59,363 epoch 10 - iter 890/894 - loss 0.00387118 - time (sec): 70.92 - samples/sec: 1214.02 - lr: 0.000000 - momentum: 0.000000
|
216 |
+
2023-10-17 16:31:59,677 ----------------------------------------------------------------------------------------------------
|
217 |
+
2023-10-17 16:31:59,677 EPOCH 10 done: loss 0.0039 - lr: 0.000000
|
218 |
+
2023-10-17 16:32:11,414 DEV : loss 0.24554787576198578 - f1-score (micro avg) 0.7947
|
219 |
+
2023-10-17 16:32:11,470 saving best model
|
220 |
+
2023-10-17 16:32:13,374 ----------------------------------------------------------------------------------------------------
|
221 |
+
2023-10-17 16:32:13,376 Loading model from best epoch ...
|
222 |
+
2023-10-17 16:32:15,657 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time
|
223 |
+
2023-10-17 16:32:21,664
|
224 |
+
Results:
|
225 |
+
- F-score (micro) 0.7665
|
226 |
+
- F-score (macro) 0.6658
|
227 |
+
- Accuracy 0.6401
|
228 |
+
|
229 |
+
By class:
|
230 |
+
precision recall f1-score support
|
231 |
+
|
232 |
+
loc 0.8478 0.8691 0.8583 596
|
233 |
+
pers 0.7116 0.8078 0.7567 333
|
234 |
+
org 0.5207 0.4773 0.4980 132
|
235 |
+
prod 0.5849 0.4697 0.5210 66
|
236 |
+
time 0.7174 0.6735 0.6947 49
|
237 |
+
|
238 |
+
micro avg 0.7560 0.7772 0.7665 1176
|
239 |
+
macro avg 0.6765 0.6595 0.6658 1176
|
240 |
+
weighted avg 0.7523 0.7772 0.7634 1176
|
241 |
+
|
242 |
+
2023-10-17 16:32:21,665 ----------------------------------------------------------------------------------------------------
|