End of training
Browse files- README.md +14 -13
- all_results.json +22 -22
- eval_results.json +8 -8
- predict_results.json +8 -8
- predictions.txt +0 -0
- tb/events.out.tfevents.1725581040.2a66098fac87.15776.1 +3 -0
- train.log +50 -0
- train_results.json +6 -6
- trainer_state.json +171 -164
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- combined-train-distemist-dev-85-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,24 +19,24 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: combined-train-distemist-dev-85-ner
|
22 |
-
type: combined-train-distemist-dev-85-ner
|
23 |
config: CombinedTrainDisTEMISTDevNER
|
24 |
split: validation
|
25 |
args: CombinedTrainDisTEMISTDevNER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
-
value: 0.
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
-
value: 0.
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
-
value: 0.
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the combined-train-distemist-dev-85-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss:
|
49 |
-
- Precision: 0.
|
50 |
-
- Recall: 0.
|
51 |
-
- F1: 0.
|
52 |
-
- Accuracy: 0.
|
53 |
|
54 |
## Model description
|
55 |
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/combined-train-distemist-dev-85-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/combined-train-distemist-dev-85-ner
|
23 |
+
type: Rodrigo1771/combined-train-distemist-dev-85-ner
|
24 |
config: CombinedTrainDisTEMISTDevNER
|
25 |
split: validation
|
26 |
args: CombinedTrainDisTEMISTDevNER
|
27 |
metrics:
|
28 |
- name: Precision
|
29 |
type: precision
|
30 |
+
value: 0.3152508603513856
|
31 |
- name: Recall
|
32 |
type: recall
|
33 |
+
value: 0.8144595226953674
|
34 |
- name: F1
|
35 |
type: f1
|
36 |
+
value: 0.45455732567249935
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
+
value: 0.8564886649182308
|
40 |
---
|
41 |
|
42 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/combined-train-distemist-dev-85-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.7006
|
50 |
+
- Precision: 0.3153
|
51 |
+
- Recall: 0.8145
|
52 |
+
- F1: 0.4546
|
53 |
+
- Accuracy: 0.8565
|
54 |
|
55 |
## Model description
|
56 |
|
all_results.json
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
"eval_samples": 6810,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
-
"predict_accuracy": 0.
|
13 |
-
"predict_f1": 0.
|
14 |
-
"predict_loss": 0.
|
15 |
-
"predict_precision": 0.
|
16 |
-
"predict_recall": 0.
|
17 |
-
"predict_runtime":
|
18 |
-
"predict_samples_per_second":
|
19 |
-
"predict_steps_per_second":
|
20 |
-
"total_flos": 1.
|
21 |
-
"train_loss": 0.
|
22 |
-
"train_runtime":
|
23 |
-
"train_samples":
|
24 |
-
"train_samples_per_second":
|
25 |
-
"train_steps_per_second": 3.
|
26 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.8564886649182308,
|
4 |
+
"eval_f1": 0.45455732567249935,
|
5 |
+
"eval_loss": 0.7005925178527832,
|
6 |
+
"eval_precision": 0.3152508603513856,
|
7 |
+
"eval_recall": 0.8144595226953674,
|
8 |
+
"eval_runtime": 14.2934,
|
9 |
"eval_samples": 6810,
|
10 |
+
"eval_samples_per_second": 476.445,
|
11 |
+
"eval_steps_per_second": 59.608,
|
12 |
+
"predict_accuracy": 0.9437946603149774,
|
13 |
+
"predict_f1": 0.6365870441364396,
|
14 |
+
"predict_loss": 0.22766011953353882,
|
15 |
+
"predict_precision": 0.519576379974326,
|
16 |
+
"predict_recall": 0.8216188784572444,
|
17 |
+
"predict_runtime": 29.1056,
|
18 |
+
"predict_samples_per_second": 502.103,
|
19 |
+
"predict_steps_per_second": 62.771,
|
20 |
+
"total_flos": 1.7176580067661056e+16,
|
21 |
+
"train_loss": 0.0812657987344287,
|
22 |
+
"train_runtime": 1549.44,
|
23 |
+
"train_samples": 34604,
|
24 |
+
"train_samples_per_second": 223.332,
|
25 |
+
"train_steps_per_second": 3.492
|
26 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
"eval_samples": 6810,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.8564886649182308,
|
4 |
+
"eval_f1": 0.45455732567249935,
|
5 |
+
"eval_loss": 0.7005925178527832,
|
6 |
+
"eval_precision": 0.3152508603513856,
|
7 |
+
"eval_recall": 0.8144595226953674,
|
8 |
+
"eval_runtime": 14.2934,
|
9 |
"eval_samples": 6810,
|
10 |
+
"eval_samples_per_second": 476.445,
|
11 |
+
"eval_steps_per_second": 59.608
|
12 |
}
|
predict_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"predict_accuracy": 0.
|
3 |
-
"predict_f1": 0.
|
4 |
-
"predict_loss": 0.
|
5 |
-
"predict_precision": 0.
|
6 |
-
"predict_recall": 0.
|
7 |
-
"predict_runtime":
|
8 |
-
"predict_samples_per_second":
|
9 |
-
"predict_steps_per_second":
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"predict_accuracy": 0.9437946603149774,
|
3 |
+
"predict_f1": 0.6365870441364396,
|
4 |
+
"predict_loss": 0.22766011953353882,
|
5 |
+
"predict_precision": 0.519576379974326,
|
6 |
+
"predict_recall": 0.8216188784572444,
|
7 |
+
"predict_runtime": 29.1056,
|
8 |
+
"predict_samples_per_second": 502.103,
|
9 |
+
"predict_steps_per_second": 62.771
|
10 |
}
|
predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725581040.2a66098fac87.15776.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35dfb83cada8fb681a65313600c2857481bd7212900b1573ca8d6ce9b64470bb
|
3 |
+
size 560
|
train.log
CHANGED
@@ -1495,3 +1495,53 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
1495 |
{'eval_loss': 1.0022608041763306, 'eval_precision': 0.31162999550965426, 'eval_recall': 0.8118858212447356, 'eval_f1': 0.45038613797131544, 'eval_accuracy': 0.8533304955579661, 'eval_runtime': 14.4559, 'eval_samples_per_second': 471.089, 'eval_steps_per_second': 58.938, 'epoch': 10.0}
|
1496 |
{'train_runtime': 1549.44, 'train_samples_per_second': 223.332, 'train_steps_per_second': 3.492, 'train_loss': 0.0812657987344287, 'epoch': 10.0}
|
1497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1498 |
0%| | 0/852 [00:00<?, ?it/s]
|
1499 |
1%| | 10/852 [00:00<00:09, 92.77it/s]
|
1500 |
2%|▏ | 20/852 [00:00<00:10, 80.16it/s]
|
1501 |
3%|▎ | 29/852 [00:00<00:10, 79.89it/s]
|
1502 |
4%|▍ | 38/852 [00:00<00:10, 80.51it/s]
|
1503 |
6%|▌ | 47/852 [00:00<00:09, 81.99it/s]
|
1504 |
7%|▋ | 56/852 [00:00<00:09, 82.94it/s]
|
1505 |
8%|▊ | 65/852 [00:00<00:09, 82.00it/s]
|
1506 |
9%|▊ | 74/852 [00:00<00:09, 80.66it/s]
|
1507 |
10%|▉ | 83/852 [00:01<00:09, 80.65it/s]
|
1508 |
11%|█ | 92/852 [00:01<00:09, 80.64it/s]
|
1509 |
12%|█▏ | 101/852 [00:01<00:09, 79.84it/s]
|
1510 |
13%|█▎ | 109/852 [00:01<00:09, 79.60it/s]
|
1511 |
14%|█▍ | 118/852 [00:01<00:09, 80.66it/s]
|
1512 |
15%|█▍ | 127/852 [00:01<00:09, 77.73it/s]
|
1513 |
16%|█▌ | 136/852 [00:01<00:09, 78.64it/s]
|
1514 |
17%|█▋ | 144/852 [00:01<00:08, 78.98it/s]
|
1515 |
18%|█▊ | 152/852 [00:01<00:09, 77.66it/s]
|
1516 |
19%|█▉ | 161/852 [00:02<00:08, 79.31it/s]
|
1517 |
20%|█▉ | 169/852 [00:02<00:08, 79.22it/s]
|
1518 |
21%|██ | 178/852 [00:02<00:08, 79.74it/s]
|
1519 |
22%|██▏ | 187/852 [00:02<00:08, 80.55it/s]
|
1520 |
23%|██▎ | 196/852 [00:02<00:08, 80.55it/s]
|
1521 |
24%|██▍ | 205/852 [00:02<00:08, 80.53it/s]
|
1522 |
25%|██▌ | 214/852 [00:02<00:08, 78.05it/s]
|
1523 |
26%|██▌ | 223/852 [00:02<00:07, 79.41it/s]
|
1524 |
27%|██▋ | 232/852 [00:02<00:07, 80.51it/s]
|
1525 |
28%|██▊ | 241/852 [00:03<00:07, 77.55it/s]
|
1526 |
29%|██▉ | 250/852 [00:03<00:07, 79.18it/s]
|
1527 |
30%|███ | 259/852 [00:03<00:07, 80.41it/s]
|
1528 |
31%|███▏ | 268/852 [00:03<00:07, 80.34it/s]
|
1529 |
33%|███▎ | 277/852 [00:03<00:07, 81.00it/s]
|
1530 |
34%|███▎ | 286/852 [00:03<00:06, 81.59it/s]
|
1531 |
35%|███▍ | 295/852 [00:03<00:06, 80.31it/s]
|
1532 |
36%|███▌ | 304/852 [00:03<00:06, 81.47it/s]
|
1533 |
37%|███▋ | 313/852 [00:03<00:06, 79.79it/s]
|
1534 |
38%|███▊ | 322/852 [00:04<00:06, 81.33it/s]
|
1535 |
39%|███▉ | 331/852 [00:04<00:06, 80.75it/s]
|
1536 |
40%|███▉ | 340/852 [00:04<00:06, 80.70it/s]
|
1537 |
41%|████ | 349/852 [00:04<00:06, 80.37it/s]
|
1538 |
42%|████▏ | 358/852 [00:04<00:06, 78.68it/s]
|
1539 |
43%|████▎ | 367/852 [00:04<00:06, 79.85it/s]
|
1540 |
44%|████▍ | 376/852 [00:04<00:05, 80.53it/s]
|
1541 |
45%|████▌ | 385/852 [00:04<00:05, 80.46it/s]
|
1542 |
46%|████▌ | 394/852 [00:04<00:05, 81.26it/s]
|
1543 |
47%|████▋ | 403/852 [00:05<00:05, 81.11it/s]
|
1544 |
48%|████▊ | 412/852 [00:05<00:05, 79.03it/s]
|
1545 |
49%|████▉ | 421/852 [00:05<00:05, 80.48it/s]
|
1546 |
50%|█████ | 430/852 [00:05<00:05, 79.74it/s]
|
1547 |
52%|█████▏ | 439/852 [00:05<00:05, 81.17it/s]
|
1548 |
53%|█████▎ | 448/852 [00:05<00:04, 81.44it/s]
|
1549 |
54%|█████▎ | 457/852 [00:05<00:04, 82.03it/s]
|
1550 |
55%|█████▍ | 466/852 [00:05<00:04, 79.25it/s]
|
1551 |
56%|█████▌ | 474/852 [00:05<00:05, 75.40it/s]
|
1552 |
57%|█████▋ | 482/852 [00:06<00:04, 76.12it/s]
|
1553 |
58%|█████▊ | 490/852 [00:06<00:04, 76.55it/s]
|
1554 |
59%|█████▊ | 499/852 [00:06<00:04, 78.88it/s]
|
1555 |
60%|█████▉ | 507/852 [00:06<00:04, 78.87it/s]
|
1556 |
61%|██████ | 516/852 [00:06<00:04, 80.91it/s]
|
1557 |
62%|██████▏ | 525/852 [00:06<00:04, 79.57it/s]
|
1558 |
63%|██████▎ | 534/852 [00:06<00:03, 79.97it/s]
|
1559 |
64%|██████▎ | 543/852 [00:06<00:03, 81.17it/s]
|
1560 |
65%|██████▍ | 552/852 [00:06<00:03, 79.32it/s]
|
1561 |
66%|██████▌ | 561/852 [00:07<00:03, 79.73it/s]
|
1562 |
67%|██████▋ | 570/852 [00:07<00:03, 80.17it/s]
|
1563 |
68%|██████▊ | 579/852 [00:07<00:03, 79.01it/s]
|
1564 |
69%|██████▉ | 587/852 [00:07<00:03, 78.03it/s]
|
1565 |
70%|██████▉ | 596/852 [00:07<00:03, 78.78it/s]
|
1566 |
71%|███████ | 604/852 [00:07<00:03, 78.44it/s]
|
1567 |
72%|███████▏ | 612/852 [00:07<00:03, 77.03it/s]
|
1568 |
73%|███████▎ | 620/852 [00:07<00:02, 77.43it/s]
|
1569 |
74%|███████▎ | 628/852 [00:07<00:02, 77.06it/s]
|
1570 |
75%|███████▍ | 636/852 [00:07<00:02, 77.78it/s]
|
1571 |
76%|███████▌ | 644/852 [00:08<00:02, 75.78it/s]
|
1572 |
77%|███████▋ | 653/852 [00:08<00:02, 77.88it/s]
|
1573 |
78%|███████▊ | 662/852 [00:08<00:02, 78.58it/s]
|
1574 |
79%|███████▉ | 671/852 [00:08<00:02, 78.80it/s]
|
1575 |
80%|███████▉ | 680/852 [00:08<00:02, 79.13it/s]
|
1576 |
81%|████████ | 689/852 [00:08<00:02, 80.27it/s]
|
1577 |
82%|████████▏ | 698/852 [00:08<00:01, 80.46it/s]
|
1578 |
83%|████████▎ | 707/852 [00:08<00:01, 80.49it/s]
|
1579 |
84%|████████▍ | 716/852 [00:08<00:01, 80.53it/s]
|
1580 |
85%|████████▌ | 725/852 [00:09<00:01, 81.23it/s]
|
1581 |
86%|████████▌ | 734/852 [00:09<00:01, 82.34it/s]
|
1582 |
87%|████████▋ | 743/852 [00:09<00:01, 82.16it/s]
|
1583 |
88%|████████▊ | 752/852 [00:09<00:01, 82.54it/s]
|
1584 |
89%|████████▉ | 761/852 [00:09<00:01, 83.78it/s]
|
1585 |
90%|█████████ | 770/852 [00:09<00:00, 82.29it/s]
|
1586 |
91%|█████████▏| 779/852 [00:09<00:00, 81.45it/s]
|
1587 |
92%|█████████▏| 788/852 [00:09<00:00, 80.57it/s]
|
1588 |
94%|█████████▎| 797/852 [00:09<00:00, 80.93it/s]
|
1589 |
95%|█████████▍| 806/852 [00:10<00:00, 82.19it/s]
|
1590 |
96%|█████████▌| 815/852 [00:10<00:00, 81.02it/s]
|
1591 |
97%|█████████▋| 824/852 [00:10<00:00, 81.64it/s]
|
1592 |
98%|█████████▊| 833/852 [00:10<00:00, 82.31it/s]
|
1593 |
99%|█████████▉| 842/852 [00:10<00:00, 80.76it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1594 |
0%| | 0/1827 [00:00<?, ?it/s]
|
1595 |
1%| | 10/1827 [00:00<00:19, 91.06it/s]
|
1596 |
1%| | 20/1827 [00:00<00:22, 79.50it/s]
|
1597 |
2%|▏ | 29/1827 [00:00<00:21, 81.87it/s]
|
1598 |
2%|▏ | 38/1827 [00:00<00:22, 80.70it/s]
|
1599 |
3%|▎ | 47/1827 [00:00<00:21, 81.06it/s]
|
1600 |
3%|▎ | 56/1827 [00:00<00:21, 80.92it/s]
|
1601 |
4%|▎ | 65/1827 [00:00<00:22, 78.97it/s]
|
1602 |
4%|▍ | 74/1827 [00:00<00:21, 79.80it/s]
|
1603 |
5%|▍ | 83/1827 [00:01<00:21, 80.85it/s]
|
1604 |
5%|▌ | 92/1827 [00:01<00:21, 80.71it/s]
|
1605 |
6%|▌ | 101/1827 [00:01<00:20, 82.27it/s]
|
1606 |
6%|▌ | 110/1827 [00:01<00:20, 82.50it/s]
|
1607 |
7%|▋ | 119/1827 [00:01<00:20, 81.39it/s]
|
1608 |
7%|▋ | 128/1827 [00:01<00:21, 80.71it/s]
|
1609 |
7%|▋ | 137/1827 [00:01<00:20, 81.92it/s]
|
1610 |
8%|▊ | 146/1827 [00:01<00:20, 82.51it/s]
|
1611 |
8%|▊ | 155/1827 [00:01<00:20, 82.36it/s]
|
1612 |
9%|▉ | 164/1827 [00:02<00:20, 81.09it/s]
|
1613 |
9%|▉ | 173/1827 [00:02<00:20, 82.25it/s]
|
1614 |
10%|▉ | 182/1827 [00:02<00:19, 83.50it/s]
|
1615 |
10%|█ | 191/1827 [00:02<00:19, 83.60it/s]
|
1616 |
11%|█ | 200/1827 [00:02<00:19, 82.02it/s]
|
1617 |
11%|█▏ | 209/1827 [00:02<00:19, 81.91it/s]
|
1618 |
12%|���▏ | 218/1827 [00:02<00:19, 80.50it/s]
|
1619 |
12%|█▏ | 227/1827 [00:02<00:19, 81.39it/s]
|
1620 |
13%|█▎ | 236/1827 [00:02<00:20, 78.84it/s]
|
1621 |
13%|█▎ | 245/1827 [00:03<00:19, 80.03it/s]
|
1622 |
14%|█▍ | 254/1827 [00:03<00:19, 80.43it/s]
|
1623 |
14%|█▍ | 263/1827 [00:03<00:19, 80.02it/s]
|
1624 |
15%|█▍ | 272/1827 [00:03<00:19, 81.42it/s]
|
1625 |
15%|█▌ | 281/1827 [00:03<00:18, 82.68it/s]
|
1626 |
16%|█▌ | 290/1827 [00:03<00:18, 82.21it/s]
|
1627 |
16%|█▋ | 299/1827 [00:03<00:18, 82.61it/s]
|
1628 |
17%|█▋ | 308/1827 [00:03<00:18, 82.37it/s]
|
1629 |
17%|█▋ | 317/1827 [00:03<00:18, 82.45it/s]
|
1630 |
18%|█▊ | 326/1827 [00:03<00:17, 83.97it/s]
|
1631 |
18%|█▊ | 335/1827 [00:04<00:17, 83.77it/s]
|
1632 |
19%|█▉ | 344/1827 [00:04<00:17, 84.88it/s]
|
1633 |
19%|█▉ | 353/1827 [00:04<00:18, 79.76it/s]
|
1634 |
20%|█▉ | 362/1827 [00:04<00:18, 80.32it/s]
|
1635 |
20%|██ | 371/1827 [00:04<00:18, 80.44it/s]
|
1636 |
21%|██ | 380/1827 [00:04<00:17, 80.41it/s]
|
1637 |
21%|██▏ | 389/1827 [00:04<00:18, 79.52it/s]
|
1638 |
22%|██▏ | 397/1827 [00:04<00:18, 78.70it/s]
|
1639 |
22%|██▏ | 406/1827 [00:04<00:17, 79.69it/s]
|
1640 |
23%|██▎ | 414/1827 [00:05<00:18, 77.35it/s]
|
1641 |
23%|██▎ | 423/1827 [00:05<00:17, 78.47it/s]
|
1642 |
24%|██▎ | 431/1827 [00:05<00:17, 77.94it/s]
|
1643 |
24%|██▍ | 439/1827 [00:05<00:17, 77.85it/s]
|
1644 |
24%|██▍ | 447/1827 [00:05<00:17, 77.54it/s]
|
1645 |
25%|██▍ | 456/1827 [00:05<00:17, 79.01it/s]
|
1646 |
25%|██▌ | 465/1827 [00:05<00:16, 80.47it/s]
|
1647 |
26%|██▌ | 474/1827 [00:05<00:17, 78.97it/s]
|
1648 |
26%|██▋ | 482/1827 [00:05<00:17, 77.54it/s]
|
1649 |
27%|██▋ | 490/1827 [00:06<00:17, 76.65it/s]
|
1650 |
27%|██▋ | 499/1827 [00:06<00:16, 78.31it/s]
|
1651 |
28%|██▊ | 508/1827 [00:06<00:16, 79.34it/s]
|
1652 |
28%|██▊ | 517/1827 [00:06<00:16, 79.61it/s]
|
1653 |
29%|██▉ | 526/1827 [00:06<00:16, 80.80it/s]
|
1654 |
29%|██▉ | 535/1827 [00:06<00:16, 80.00it/s]
|
1655 |
30%|██▉ | 544/1827 [00:06<00:15, 80.92it/s]
|
1656 |
30%|███ | 553/1827 [00:06<00:15, 80.34it/s]
|
1657 |
31%|███ | 562/1827 [00:06<00:15, 81.20it/s]
|
1658 |
31%|███▏ | 571/1827 [00:07<00:15, 83.07it/s]
|
1659 |
32%|███▏ | 580/1827 [00:07<00:14, 83.38it/s]
|
1660 |
32%|███▏ | 589/1827 [00:07<00:14, 83.03it/s]
|
1661 |
33%|███▎ | 598/1827 [00:07<00:14, 82.25it/s]
|
1662 |
33%|███▎ | 607/1827 [00:07<00:14, 81.98it/s]
|
1663 |
34%|███▎ | 616/1827 [00:07<00:14, 81.52it/s]
|
1664 |
34%|███▍ | 625/1827 [00:07<00:14, 82.30it/s]
|
1665 |
35%|███▍ | 634/1827 [00:07<00:14, 82.87it/s]
|
1666 |
35%|███▌ | 643/1827 [00:07<00:14, 83.83it/s]
|
1667 |
36%|███▌ | 652/1827 [00:08<00:15, 78.28it/s]
|
1668 |
36%|███▌ | 661/1827 [00:08<00:14, 79.08it/s]
|
1669 |
37%|███▋ | 670/1827 [00:08<00:14, 80.28it/s]
|
1670 |
37%|███▋ | 679/1827 [00:08<00:14, 80.78it/s]
|
1671 |
38%|███▊ | 688/1827 [00:08<00:13, 82.11it/s]
|
1672 |
38%|███▊ | 697/1827 [00:08<00:13, 83.76it/s]
|
1673 |
39%|███▊ | 706/1827 [00:08<00:13, 83.15it/s]
|
1674 |
39%|███▉ | 715/1827 [00:08<00:13, 84.66it/s]
|
1675 |
40%|███▉ | 724/1827 [00:08<00:12, 85.34it/s]
|
1676 |
40%|████ | 733/1827 [00:09<00:12, 85.46it/s]
|
1677 |
41%|████ | 742/1827 [00:09<00:12, 85.34it/s]
|
1678 |
41%|████ | 751/1827 [00:09<00:12, 86.45it/s]
|
1679 |
42%|████▏ | 760/1827 [00:09<00:12, 85.05it/s]
|
1680 |
42%|████▏ | 769/1827 [00:09<00:12, 86.03it/s]
|
1681 |
43%|████▎ | 778/1827 [00:09<00:12, 84.73it/s]
|
1682 |
43%|████▎ | 787/1827 [00:09<00:12, 81.97it/s]
|
1683 |
44%|████▎ | 796/1827 [00:09<00:12, 83.05it/s]
|
1684 |
44%|████▍ | 805/1827 [00:09<00:12, 81.61it/s]
|
1685 |
45%|████▍ | 814/1827 [00:09<00:12, 82.53it/s]
|
1686 |
45%|████▌ | 823/1827 [00:10<00:12, 82.99it/s]
|
1687 |
46%|████▌ | 832/1827 [00:10<00:12, 82.38it/s]
|
1688 |
46%|████▌ | 841/1827 [00:10<00:11, 82.96it/s]
|
1689 |
47%|████▋ | 850/1827 [00:10<00:11, 83.72it/s]
|
1690 |
47%|████▋ | 859/1827 [00:10<00:11, 84.83it/s]
|
1691 |
48%|████▊ | 868/1827 [00:10<00:11, 84.11it/s]
|
1692 |
48%|████▊ | 877/1827 [00:10<00:11, 83.69it/s]
|
1693 |
48%|████▊ | 886/1827 [00:10<00:11, 84.15it/s]
|
1694 |
49%|████▉ | 895/1827 [00:10<00:11, 83.37it/s]
|
1695 |
49%|████▉ | 904/1827 [00:11<00:10, 84.07it/s]
|
1696 |
50%|████▉ | 913/1827 [00:11<00:10, 84.82it/s]
|
1697 |
50%|█████ | 922/1827 [00:11<00:10, 84.51it/s]
|
1698 |
51%|█████ | 931/1827 [00:11<00:10, 81.89it/s]
|
1699 |
51%|█████▏ | 940/1827 [00:11<00:10, 82.43it/s]
|
1700 |
52%|█████▏ | 949/1827 [00:11<00:10, 81.02it/s]
|
1701 |
52%|█████▏ | 958/1827 [00:11<00:10, 81.56it/s]
|
1702 |
53%|█████▎ | 967/1827 [00:11<00:10, 83.24it/s]
|
1703 |
53%|█████▎ | 976/1827 [00:11<00:10, 82.41it/s]
|
1704 |
54%|█████▍ | 985/1827 [00:12<00:10, 83.96it/s]
|
1705 |
54%|█████▍ | 994/1827 [00:12<00:09, 84.30it/s]
|
1706 |
55%|█████▍ | 1003/1827 [00:12<00:09, 83.87it/s]
|
1707 |
55%|█████▌ | 1012/1827 [00:12<00:09, 83.40it/s]
|
1708 |
56%|█████▌ | 1021/1827 [00:12<00:09, 84.27it/s]
|
1709 |
56%|█████▋ | 1030/1827 [00:12<00:09, 84.53it/s]
|
1710 |
57%|█████▋ | 1039/1827 [00:12<00:09, 82.78it/s]
|
1711 |
57%|█████▋ | 1048/1827 [00:12<00:09, 83.18it/s]
|
1712 |
58%|█████▊ | 1057/1827 [00:12<00:09, 83.19it/s]
|
1713 |
58%|█████▊ | 1066/1827 [00:13<00:09, 84.07it/s]
|
1714 |
59%|█████▉ | 1075/1827 [00:13<00:08, 84.64it/s]
|
1715 |
59%|█████▉ | 1084/1827 [00:13<00:08, 85.01it/s]
|
1716 |
60%|█████▉ | 1093/1827 [00:13<00:08, 85.36it/s]
|
1717 |
60%|██████ | 1102/1827 [00:13<00:08, 84.17it/s]
|
1718 |
61%|██████ | 1111/1827 [00:13<00:08, 82.68it/s]
|
1719 |
61%|██████▏ | 1120/1827 [00:13<00:08, 81.76it/s]
|
1720 |
62%|██████▏ | 1129/1827 [00:13<00:08, 81.92it/s]
|
1721 |
62%|██████▏ | 1138/1827 [00:13<00:08, 81.83it/s]
|
1722 |
63%|██████▎ | 1147/1827 [00:13<00:08, 82.31it/s]
|
1723 |
63%|██████▎ | 1156/1827 [00:14<00:08, 82.02it/s]
|
1724 |
64%|██████▍ | 1165/1827 [00:14<00:08, 79.48it/s]
|
1725 |
64%|██████▍ | 1174/1827 [00:14<00:08, 80.60it/s]
|
1726 |
65%|██████▍ | 1183/1827 [00:14<00:08, 78.91it/s]
|
1727 |
65%|██████▌ | 1192/1827 [00:14<00:07, 81.02it/s]
|
1728 |
66%|██████▌ | 1201/1827 [00:14<00:07, 80.79it/s]
|
1729 |
66%|██████▌ | 1210/1827 [00:14<00:07, 81.06it/s]
|
1730 |
67%|██████▋ | 1219/1827 [00:14<00:07, 79.89it/s]
|
1731 |
67%|██████▋ | 1228/1827 [00:14<00:07, 81.29it/s]
|
1732 |
68%|██████▊ | 1237/1827 [00:15<00:07, 82.50it/s]
|
1733 |
68%|██████▊ | 1246/1827 [00:15<00:07, 82.71it/s]
|
1734 |
69%|██████▊ | 1255/1827 [00:15<00:07, 80.01it/s]
|
1735 |
69%|██████▉ | 1264/1827 [00:15<00:07, 80.07it/s]
|
1736 |
70%|██████▉ | 1273/1827 [00:15<00:06, 81.19it/s]
|
1737 |
70%|███████ | 1282/1827 [00:15<00:06, 82.47it/s]
|
1738 |
71%|███████ | 1291/1827 [00:15<00:06, 83.35it/s]
|
1739 |
71%|███████ | 1300/1827 [00:15<00:06, 83.34it/s]
|
1740 |
72%|███████▏ | 1309/1827 [00:15<00:06, 83.88it/s]
|
1741 |
72%|███████▏ | 1318/1827 [00:16<00:05, 84.84it/s]
|
1742 |
73%|███████▎ | 1327/1827 [00:16<00:05, 83.90it/s]
|
1743 |
73%|███████▎ | 1336/1827 [00:16<00:05, 84.12it/s]
|
1744 |
74%|███████▎ | 1345/1827 [00:16<00:05, 83.22it/s]
|
1745 |
74%|███████▍ | 1354/1827 [00:16<00:05, 82.63it/s]
|
1746 |
75%|███████▍ | 1363/1827 [00:16<00:05, 82.32it/s]
|
1747 |
75%|███████▌ | 1372/1827 [00:16<00:05, 81.87it/s]
|
1748 |
76%|███████▌ | 1381/1827 [00:16<00:05, 82.35it/s]
|
1749 |
76%|███████▌ | 1390/1827 [00:16<00:05, 81.43it/s]
|
1750 |
77%|███████▋ | 1399/1827 [00:17<00:05, 82.08it/s]
|
1751 |
77%|███████▋ | 1408/1827 [00:17<00:05, 82.67it/s]
|
1752 |
78%|███████▊ | 1417/1827 [00:17<00:04, 82.84it/s]
|
1753 |
78%|███████▊ | 1426/1827 [00:17<00:04, 82.22it/s]
|
1754 |
79%|███████▊ | 1435/1827 [00:17<00:04, 81.18it/s]
|
1755 |
79%|███████▉ | 1444/1827 [00:17<00:04, 78.91it/s]
|
1756 |
80%|███████▉ | 1453/1827 [00:17<00:04, 80.47it/s]
|
1757 |
80%|████████ | 1462/1827 [00:17<00:04, 79.66it/s]
|
1758 |
80%|████████ | 1470/1827 [00:17<00:04, 78.26it/s]
|
1759 |
81%|████████ | 1478/1827 [00:18<00:04, 78.65it/s]
|
1760 |
81%|████████▏ | 1487/1827 [00:18<00:04, 79.78it/s]
|
1761 |
82%|████████▏ | 1495/1827 [00:18<00:04, 77.95it/s]
|
1762 |
82%|████████▏ | 1503/1827 [00:18<00:04, 77.75it/s]
|
1763 |
83%|████████▎ | 1512/1827 [00:18<00:03, 79.71it/s]
|
1764 |
83%|████████▎ | 1521/1827 [00:18<00:03, 80.57it/s]
|
1765 |
84%|████████▎ | 1530/1827 [00:18<00:03, 80.96it/s]
|
1766 |
84%|████████▍ | 1539/1827 [00:18<00:03, 81.58it/s]
|
1767 |
85%|████████▍ | 1548/1827 [00:18<00:03, 81.13it/s]
|
1768 |
85%|████████▌ | 1557/1827 [00:19<00:03, 81.30it/s]
|
1769 |
86%|████████▌ | 1566/1827 [00:19<00:03, 81.60it/s]
|
1770 |
86%|████████▌ | 1575/1827 [00:19<00:03, 81.67it/s]
|
1771 |
87%|████████▋ | 1584/1827 [00:19<00:02, 82.53it/s]
|
1772 |
87%|████████▋ | 1593/1827 [00:19<00:02, 82.02it/s]
|
1773 |
88%|████████▊ | 1602/1827 [00:19<00:02, 82.22it/s]
|
1774 |
88%|████████▊ | 1611/1827 [00:19<00:02, 82.92it/s]
|
1775 |
89%|████████▊ | 1620/1827 [00:19<00:02, 79.31it/s]
|
1776 |
89%|████████▉ | 1628/1827 [00:19<00:02, 76.38it/s]
|
1777 |
90%|████████▉ | 1637/1827 [00:20<00:02, 78.37it/s]
|
1778 |
90%|█████████ | 1646/1827 [00:20<00:02, 79.98it/s]
|
1779 |
91%|█████████ | 1655/1827 [00:20<00:02, 81.60it/s]
|
1780 |
91%|█████████ | 1664/1827 [00:20<00:02, 81.06it/s]
|
1781 |
92%|█████████▏| 1673/1827 [00:20<00:01, 81.37it/s]
|
1782 |
92%|█████████▏| 1682/1827 [00:20<00:01, 82.25it/s]
|
1783 |
93%|█████████▎| 1691/1827 [00:20<00:01, 83.16it/s]
|
1784 |
93%|█████████▎| 1700/1827 [00:20<00:01, 81.57it/s]
|
1785 |
94%|█████████▎| 1709/1827 [00:20<00:01, 82.04it/s]
|
1786 |
94%|█████████▍| 1718/1827 [00:21<00:01, 83.23it/s]
|
1787 |
95%|█████████▍| 1727/1827 [00:21<00:01, 80.36it/s]
|
1788 |
95%|█████████▌| 1736/1827 [00:21<00:01, 81.04it/s]
|
1789 |
96%|█████████▌| 1745/1827 [00:21<00:01, 81.89it/s]
|
1790 |
96%|█████████▌| 1754/1827 [00:21<00:00, 82.74it/s]
|
1791 |
96%|█████████▋| 1763/1827 [00:21<00:00, 82.25it/s]
|
1792 |
97%|█████████▋| 1772/1827 [00:21<00:00, 82.64it/s]
|
1793 |
97%|█████████▋| 1781/1827 [00:21<00:00, 83.22it/s]
|
1794 |
98%|█████████▊| 1790/1827 [00:21<00:00, 83.23it/s]
|
1795 |
98%|█████████▊| 1799/1827 [00:22<00:00, 80.14it/s]
|
1796 |
99%|█████████▉| 1808/1827 [00:22<00:00, 81.33it/s]
|
1797 |
99%|█████████▉| 1817/1827 [00:22<00:00, 81.79it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1495 |
{'eval_loss': 1.0022608041763306, 'eval_precision': 0.31162999550965426, 'eval_recall': 0.8118858212447356, 'eval_f1': 0.45038613797131544, 'eval_accuracy': 0.8533304955579661, 'eval_runtime': 14.4559, 'eval_samples_per_second': 471.089, 'eval_steps_per_second': 58.938, 'epoch': 10.0}
|
1496 |
{'train_runtime': 1549.44, 'train_samples_per_second': 223.332, 'train_steps_per_second': 3.492, 'train_loss': 0.0812657987344287, 'epoch': 10.0}
|
1497 |
|
1498 |
+
***** train metrics *****
|
1499 |
+
epoch = 10.0
|
1500 |
+
total_flos = 15996936GF
|
1501 |
+
train_loss = 0.0813
|
1502 |
+
train_runtime = 0:25:49.44
|
1503 |
+
train_samples = 34604
|
1504 |
+
train_samples_per_second = 223.332
|
1505 |
+
train_steps_per_second = 3.492
|
1506 |
+
09/06/2024 00:03:46 - INFO - __main__ - *** Evaluate ***
|
1507 |
+
[INFO|trainer.py:811] 2024-09-06 00:03:46,664 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
1508 |
+
[INFO|trainer.py:3819] 2024-09-06 00:03:46,667 >>
|
1509 |
+
***** Running Evaluation *****
|
1510 |
+
[INFO|trainer.py:3821] 2024-09-06 00:03:46,667 >> Num examples = 6810
|
1511 |
+
[INFO|trainer.py:3824] 2024-09-06 00:03:46,667 >> Batch size = 8
|
1512 |
+
|
1513 |
0%| | 0/852 [00:00<?, ?it/s]
|
1514 |
1%| | 10/852 [00:00<00:09, 92.77it/s]
|
1515 |
2%|▏ | 20/852 [00:00<00:10, 80.16it/s]
|
1516 |
3%|▎ | 29/852 [00:00<00:10, 79.89it/s]
|
1517 |
4%|▍ | 38/852 [00:00<00:10, 80.51it/s]
|
1518 |
6%|▌ | 47/852 [00:00<00:09, 81.99it/s]
|
1519 |
7%|▋ | 56/852 [00:00<00:09, 82.94it/s]
|
1520 |
8%|▊ | 65/852 [00:00<00:09, 82.00it/s]
|
1521 |
9%|▊ | 74/852 [00:00<00:09, 80.66it/s]
|
1522 |
10%|▉ | 83/852 [00:01<00:09, 80.65it/s]
|
1523 |
11%|█ | 92/852 [00:01<00:09, 80.64it/s]
|
1524 |
12%|█▏ | 101/852 [00:01<00:09, 79.84it/s]
|
1525 |
13%|█▎ | 109/852 [00:01<00:09, 79.60it/s]
|
1526 |
14%|█▍ | 118/852 [00:01<00:09, 80.66it/s]
|
1527 |
15%|█▍ | 127/852 [00:01<00:09, 77.73it/s]
|
1528 |
16%|█▌ | 136/852 [00:01<00:09, 78.64it/s]
|
1529 |
17%|█▋ | 144/852 [00:01<00:08, 78.98it/s]
|
1530 |
18%|█▊ | 152/852 [00:01<00:09, 77.66it/s]
|
1531 |
19%|█▉ | 161/852 [00:02<00:08, 79.31it/s]
|
1532 |
20%|█▉ | 169/852 [00:02<00:08, 79.22it/s]
|
1533 |
21%|██ | 178/852 [00:02<00:08, 79.74it/s]
|
1534 |
22%|██▏ | 187/852 [00:02<00:08, 80.55it/s]
|
1535 |
23%|██▎ | 196/852 [00:02<00:08, 80.55it/s]
|
1536 |
24%|██▍ | 205/852 [00:02<00:08, 80.53it/s]
|
1537 |
25%|██▌ | 214/852 [00:02<00:08, 78.05it/s]
|
1538 |
26%|██▌ | 223/852 [00:02<00:07, 79.41it/s]
|
1539 |
27%|██▋ | 232/852 [00:02<00:07, 80.51it/s]
|
1540 |
28%|██▊ | 241/852 [00:03<00:07, 77.55it/s]
|
1541 |
29%|██▉ | 250/852 [00:03<00:07, 79.18it/s]
|
1542 |
30%|███ | 259/852 [00:03<00:07, 80.41it/s]
|
1543 |
31%|███▏ | 268/852 [00:03<00:07, 80.34it/s]
|
1544 |
33%|███▎ | 277/852 [00:03<00:07, 81.00it/s]
|
1545 |
34%|███▎ | 286/852 [00:03<00:06, 81.59it/s]
|
1546 |
35%|███▍ | 295/852 [00:03<00:06, 80.31it/s]
|
1547 |
36%|███▌ | 304/852 [00:03<00:06, 81.47it/s]
|
1548 |
37%|███▋ | 313/852 [00:03<00:06, 79.79it/s]
|
1549 |
38%|███▊ | 322/852 [00:04<00:06, 81.33it/s]
|
1550 |
39%|███▉ | 331/852 [00:04<00:06, 80.75it/s]
|
1551 |
40%|███▉ | 340/852 [00:04<00:06, 80.70it/s]
|
1552 |
41%|████ | 349/852 [00:04<00:06, 80.37it/s]
|
1553 |
42%|████▏ | 358/852 [00:04<00:06, 78.68it/s]
|
1554 |
43%|████▎ | 367/852 [00:04<00:06, 79.85it/s]
|
1555 |
44%|████▍ | 376/852 [00:04<00:05, 80.53it/s]
|
1556 |
45%|████▌ | 385/852 [00:04<00:05, 80.46it/s]
|
1557 |
46%|████▌ | 394/852 [00:04<00:05, 81.26it/s]
|
1558 |
47%|████▋ | 403/852 [00:05<00:05, 81.11it/s]
|
1559 |
48%|████▊ | 412/852 [00:05<00:05, 79.03it/s]
|
1560 |
49%|████▉ | 421/852 [00:05<00:05, 80.48it/s]
|
1561 |
50%|█████ | 430/852 [00:05<00:05, 79.74it/s]
|
1562 |
52%|█████▏ | 439/852 [00:05<00:05, 81.17it/s]
|
1563 |
53%|█████▎ | 448/852 [00:05<00:04, 81.44it/s]
|
1564 |
54%|█████▎ | 457/852 [00:05<00:04, 82.03it/s]
|
1565 |
55%|█████▍ | 466/852 [00:05<00:04, 79.25it/s]
|
1566 |
56%|█████▌ | 474/852 [00:05<00:05, 75.40it/s]
|
1567 |
57%|█████▋ | 482/852 [00:06<00:04, 76.12it/s]
|
1568 |
58%|█████▊ | 490/852 [00:06<00:04, 76.55it/s]
|
1569 |
59%|█████▊ | 499/852 [00:06<00:04, 78.88it/s]
|
1570 |
60%|█████▉ | 507/852 [00:06<00:04, 78.87it/s]
|
1571 |
61%|██████ | 516/852 [00:06<00:04, 80.91it/s]
|
1572 |
62%|██████▏ | 525/852 [00:06<00:04, 79.57it/s]
|
1573 |
63%|██████▎ | 534/852 [00:06<00:03, 79.97it/s]
|
1574 |
64%|██████▎ | 543/852 [00:06<00:03, 81.17it/s]
|
1575 |
65%|██████▍ | 552/852 [00:06<00:03, 79.32it/s]
|
1576 |
66%|██████▌ | 561/852 [00:07<00:03, 79.73it/s]
|
1577 |
67%|██████▋ | 570/852 [00:07<00:03, 80.17it/s]
|
1578 |
68%|██████▊ | 579/852 [00:07<00:03, 79.01it/s]
|
1579 |
69%|██████▉ | 587/852 [00:07<00:03, 78.03it/s]
|
1580 |
70%|██████▉ | 596/852 [00:07<00:03, 78.78it/s]
|
1581 |
71%|███████ | 604/852 [00:07<00:03, 78.44it/s]
|
1582 |
72%|███████▏ | 612/852 [00:07<00:03, 77.03it/s]
|
1583 |
73%|███████▎ | 620/852 [00:07<00:02, 77.43it/s]
|
1584 |
74%|███████▎ | 628/852 [00:07<00:02, 77.06it/s]
|
1585 |
75%|███████▍ | 636/852 [00:07<00:02, 77.78it/s]
|
1586 |
76%|███████▌ | 644/852 [00:08<00:02, 75.78it/s]
|
1587 |
77%|███████▋ | 653/852 [00:08<00:02, 77.88it/s]
|
1588 |
78%|███████▊ | 662/852 [00:08<00:02, 78.58it/s]
|
1589 |
79%|███████▉ | 671/852 [00:08<00:02, 78.80it/s]
|
1590 |
80%|███████▉ | 680/852 [00:08<00:02, 79.13it/s]
|
1591 |
81%|████████ | 689/852 [00:08<00:02, 80.27it/s]
|
1592 |
82%|████████▏ | 698/852 [00:08<00:01, 80.46it/s]
|
1593 |
83%|████████▎ | 707/852 [00:08<00:01, 80.49it/s]
|
1594 |
84%|████████▍ | 716/852 [00:08<00:01, 80.53it/s]
|
1595 |
85%|████████▌ | 725/852 [00:09<00:01, 81.23it/s]
|
1596 |
86%|████████▌ | 734/852 [00:09<00:01, 82.34it/s]
|
1597 |
87%|████████▋ | 743/852 [00:09<00:01, 82.16it/s]
|
1598 |
88%|████████▊ | 752/852 [00:09<00:01, 82.54it/s]
|
1599 |
89%|████████▉ | 761/852 [00:09<00:01, 83.78it/s]
|
1600 |
90%|█████████ | 770/852 [00:09<00:00, 82.29it/s]
|
1601 |
91%|█████████▏| 779/852 [00:09<00:00, 81.45it/s]
|
1602 |
92%|█████████▏| 788/852 [00:09<00:00, 80.57it/s]
|
1603 |
94%|█████████▎| 797/852 [00:09<00:00, 80.93it/s]
|
1604 |
95%|█████████▍| 806/852 [00:10<00:00, 82.19it/s]
|
1605 |
96%|█████████▌| 815/852 [00:10<00:00, 81.02it/s]
|
1606 |
97%|█████████▋| 824/852 [00:10<00:00, 81.64it/s]
|
1607 |
98%|█████████▊| 833/852 [00:10<00:00, 82.31it/s]
|
1608 |
99%|█████████▉| 842/852 [00:10<00:00, 80.76it/s]
|
1609 |
+
_warn_prf(average, modifier, msg_start, len(result))
|
1610 |
+
|
1611 |
+
***** eval metrics *****
|
1612 |
+
epoch = 10.0
|
1613 |
+
eval_accuracy = 0.8565
|
1614 |
+
eval_f1 = 0.4546
|
1615 |
+
eval_loss = 0.7006
|
1616 |
+
eval_precision = 0.3153
|
1617 |
+
eval_recall = 0.8145
|
1618 |
+
eval_runtime = 0:00:14.29
|
1619 |
+
eval_samples = 6810
|
1620 |
+
eval_samples_per_second = 476.445
|
1621 |
+
eval_steps_per_second = 59.608
|
1622 |
+
09/06/2024 00:04:00 - INFO - __main__ - *** Predict ***
|
1623 |
+
[INFO|trainer.py:811] 2024-09-06 00:04:00,968 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
1624 |
+
[INFO|trainer.py:3819] 2024-09-06 00:04:00,971 >>
|
1625 |
+
***** Running Prediction *****
|
1626 |
+
[INFO|trainer.py:3821] 2024-09-06 00:04:00,971 >> Num examples = 14614
|
1627 |
+
[INFO|trainer.py:3824] 2024-09-06 00:04:00,971 >> Batch size = 8
|
1628 |
+
|
1629 |
0%| | 0/1827 [00:00<?, ?it/s]
|
1630 |
1%| | 10/1827 [00:00<00:19, 91.06it/s]
|
1631 |
1%| | 20/1827 [00:00<00:22, 79.50it/s]
|
1632 |
2%|▏ | 29/1827 [00:00<00:21, 81.87it/s]
|
1633 |
2%|▏ | 38/1827 [00:00<00:22, 80.70it/s]
|
1634 |
3%|▎ | 47/1827 [00:00<00:21, 81.06it/s]
|
1635 |
3%|▎ | 56/1827 [00:00<00:21, 80.92it/s]
|
1636 |
4%|▎ | 65/1827 [00:00<00:22, 78.97it/s]
|
1637 |
4%|▍ | 74/1827 [00:00<00:21, 79.80it/s]
|
1638 |
5%|▍ | 83/1827 [00:01<00:21, 80.85it/s]
|
1639 |
5%|▌ | 92/1827 [00:01<00:21, 80.71it/s]
|
1640 |
6%|▌ | 101/1827 [00:01<00:20, 82.27it/s]
|
1641 |
6%|▌ | 110/1827 [00:01<00:20, 82.50it/s]
|
1642 |
7%|▋ | 119/1827 [00:01<00:20, 81.39it/s]
|
1643 |
7%|▋ | 128/1827 [00:01<00:21, 80.71it/s]
|
1644 |
7%|▋ | 137/1827 [00:01<00:20, 81.92it/s]
|
1645 |
8%|▊ | 146/1827 [00:01<00:20, 82.51it/s]
|
1646 |
8%|▊ | 155/1827 [00:01<00:20, 82.36it/s]
|
1647 |
9%|▉ | 164/1827 [00:02<00:20, 81.09it/s]
|
1648 |
9%|▉ | 173/1827 [00:02<00:20, 82.25it/s]
|
1649 |
10%|▉ | 182/1827 [00:02<00:19, 83.50it/s]
|
1650 |
10%|█ | 191/1827 [00:02<00:19, 83.60it/s]
|
1651 |
11%|█ | 200/1827 [00:02<00:19, 82.02it/s]
|
1652 |
11%|█▏ | 209/1827 [00:02<00:19, 81.91it/s]
|
1653 |
12%|���▏ | 218/1827 [00:02<00:19, 80.50it/s]
|
1654 |
12%|█▏ | 227/1827 [00:02<00:19, 81.39it/s]
|
1655 |
13%|█▎ | 236/1827 [00:02<00:20, 78.84it/s]
|
1656 |
13%|█▎ | 245/1827 [00:03<00:19, 80.03it/s]
|
1657 |
14%|█▍ | 254/1827 [00:03<00:19, 80.43it/s]
|
1658 |
14%|█▍ | 263/1827 [00:03<00:19, 80.02it/s]
|
1659 |
15%|█▍ | 272/1827 [00:03<00:19, 81.42it/s]
|
1660 |
15%|█▌ | 281/1827 [00:03<00:18, 82.68it/s]
|
1661 |
16%|█▌ | 290/1827 [00:03<00:18, 82.21it/s]
|
1662 |
16%|█▋ | 299/1827 [00:03<00:18, 82.61it/s]
|
1663 |
17%|█▋ | 308/1827 [00:03<00:18, 82.37it/s]
|
1664 |
17%|█▋ | 317/1827 [00:03<00:18, 82.45it/s]
|
1665 |
18%|█▊ | 326/1827 [00:03<00:17, 83.97it/s]
|
1666 |
18%|█▊ | 335/1827 [00:04<00:17, 83.77it/s]
|
1667 |
19%|█▉ | 344/1827 [00:04<00:17, 84.88it/s]
|
1668 |
19%|█▉ | 353/1827 [00:04<00:18, 79.76it/s]
|
1669 |
20%|█▉ | 362/1827 [00:04<00:18, 80.32it/s]
|
1670 |
20%|██ | 371/1827 [00:04<00:18, 80.44it/s]
|
1671 |
21%|██ | 380/1827 [00:04<00:17, 80.41it/s]
|
1672 |
21%|██▏ | 389/1827 [00:04<00:18, 79.52it/s]
|
1673 |
22%|██▏ | 397/1827 [00:04<00:18, 78.70it/s]
|
1674 |
22%|██▏ | 406/1827 [00:04<00:17, 79.69it/s]
|
1675 |
23%|██▎ | 414/1827 [00:05<00:18, 77.35it/s]
|
1676 |
23%|██▎ | 423/1827 [00:05<00:17, 78.47it/s]
|
1677 |
24%|██▎ | 431/1827 [00:05<00:17, 77.94it/s]
|
1678 |
24%|██▍ | 439/1827 [00:05<00:17, 77.85it/s]
|
1679 |
24%|██▍ | 447/1827 [00:05<00:17, 77.54it/s]
|
1680 |
25%|██▍ | 456/1827 [00:05<00:17, 79.01it/s]
|
1681 |
25%|██▌ | 465/1827 [00:05<00:16, 80.47it/s]
|
1682 |
26%|██▌ | 474/1827 [00:05<00:17, 78.97it/s]
|
1683 |
26%|██▋ | 482/1827 [00:05<00:17, 77.54it/s]
|
1684 |
27%|██▋ | 490/1827 [00:06<00:17, 76.65it/s]
|
1685 |
27%|██▋ | 499/1827 [00:06<00:16, 78.31it/s]
|
1686 |
28%|██▊ | 508/1827 [00:06<00:16, 79.34it/s]
|
1687 |
28%|██▊ | 517/1827 [00:06<00:16, 79.61it/s]
|
1688 |
29%|██▉ | 526/1827 [00:06<00:16, 80.80it/s]
|
1689 |
29%|██▉ | 535/1827 [00:06<00:16, 80.00it/s]
|
1690 |
30%|██▉ | 544/1827 [00:06<00:15, 80.92it/s]
|
1691 |
30%|███ | 553/1827 [00:06<00:15, 80.34it/s]
|
1692 |
31%|███ | 562/1827 [00:06<00:15, 81.20it/s]
|
1693 |
31%|███▏ | 571/1827 [00:07<00:15, 83.07it/s]
|
1694 |
32%|███▏ | 580/1827 [00:07<00:14, 83.38it/s]
|
1695 |
32%|███▏ | 589/1827 [00:07<00:14, 83.03it/s]
|
1696 |
33%|███▎ | 598/1827 [00:07<00:14, 82.25it/s]
|
1697 |
33%|███▎ | 607/1827 [00:07<00:14, 81.98it/s]
|
1698 |
34%|███▎ | 616/1827 [00:07<00:14, 81.52it/s]
|
1699 |
34%|███▍ | 625/1827 [00:07<00:14, 82.30it/s]
|
1700 |
35%|███▍ | 634/1827 [00:07<00:14, 82.87it/s]
|
1701 |
35%|███▌ | 643/1827 [00:07<00:14, 83.83it/s]
|
1702 |
36%|███▌ | 652/1827 [00:08<00:15, 78.28it/s]
|
1703 |
36%|███▌ | 661/1827 [00:08<00:14, 79.08it/s]
|
1704 |
37%|███▋ | 670/1827 [00:08<00:14, 80.28it/s]
|
1705 |
37%|███▋ | 679/1827 [00:08<00:14, 80.78it/s]
|
1706 |
38%|███▊ | 688/1827 [00:08<00:13, 82.11it/s]
|
1707 |
38%|███▊ | 697/1827 [00:08<00:13, 83.76it/s]
|
1708 |
39%|███▊ | 706/1827 [00:08<00:13, 83.15it/s]
|
1709 |
39%|███▉ | 715/1827 [00:08<00:13, 84.66it/s]
|
1710 |
40%|███▉ | 724/1827 [00:08<00:12, 85.34it/s]
|
1711 |
40%|████ | 733/1827 [00:09<00:12, 85.46it/s]
|
1712 |
41%|████ | 742/1827 [00:09<00:12, 85.34it/s]
|
1713 |
41%|████ | 751/1827 [00:09<00:12, 86.45it/s]
|
1714 |
42%|████▏ | 760/1827 [00:09<00:12, 85.05it/s]
|
1715 |
42%|████▏ | 769/1827 [00:09<00:12, 86.03it/s]
|
1716 |
43%|████▎ | 778/1827 [00:09<00:12, 84.73it/s]
|
1717 |
43%|████▎ | 787/1827 [00:09<00:12, 81.97it/s]
|
1718 |
44%|████▎ | 796/1827 [00:09<00:12, 83.05it/s]
|
1719 |
44%|████▍ | 805/1827 [00:09<00:12, 81.61it/s]
|
1720 |
45%|████▍ | 814/1827 [00:09<00:12, 82.53it/s]
|
1721 |
45%|████▌ | 823/1827 [00:10<00:12, 82.99it/s]
|
1722 |
46%|████▌ | 832/1827 [00:10<00:12, 82.38it/s]
|
1723 |
46%|████▌ | 841/1827 [00:10<00:11, 82.96it/s]
|
1724 |
47%|████▋ | 850/1827 [00:10<00:11, 83.72it/s]
|
1725 |
47%|████▋ | 859/1827 [00:10<00:11, 84.83it/s]
|
1726 |
48%|████▊ | 868/1827 [00:10<00:11, 84.11it/s]
|
1727 |
48%|████▊ | 877/1827 [00:10<00:11, 83.69it/s]
|
1728 |
48%|████▊ | 886/1827 [00:10<00:11, 84.15it/s]
|
1729 |
49%|████▉ | 895/1827 [00:10<00:11, 83.37it/s]
|
1730 |
49%|████▉ | 904/1827 [00:11<00:10, 84.07it/s]
|
1731 |
50%|████▉ | 913/1827 [00:11<00:10, 84.82it/s]
|
1732 |
50%|█████ | 922/1827 [00:11<00:10, 84.51it/s]
|
1733 |
51%|█████ | 931/1827 [00:11<00:10, 81.89it/s]
|
1734 |
51%|█████▏ | 940/1827 [00:11<00:10, 82.43it/s]
|
1735 |
52%|█████▏ | 949/1827 [00:11<00:10, 81.02it/s]
|
1736 |
52%|█████▏ | 958/1827 [00:11<00:10, 81.56it/s]
|
1737 |
53%|█████▎ | 967/1827 [00:11<00:10, 83.24it/s]
|
1738 |
53%|█████▎ | 976/1827 [00:11<00:10, 82.41it/s]
|
1739 |
54%|█████▍ | 985/1827 [00:12<00:10, 83.96it/s]
|
1740 |
54%|█████▍ | 994/1827 [00:12<00:09, 84.30it/s]
|
1741 |
55%|█████▍ | 1003/1827 [00:12<00:09, 83.87it/s]
|
1742 |
55%|█████▌ | 1012/1827 [00:12<00:09, 83.40it/s]
|
1743 |
56%|█████▌ | 1021/1827 [00:12<00:09, 84.27it/s]
|
1744 |
56%|█████▋ | 1030/1827 [00:12<00:09, 84.53it/s]
|
1745 |
57%|█████▋ | 1039/1827 [00:12<00:09, 82.78it/s]
|
1746 |
57%|█████▋ | 1048/1827 [00:12<00:09, 83.18it/s]
|
1747 |
58%|█████▊ | 1057/1827 [00:12<00:09, 83.19it/s]
|
1748 |
58%|█████▊ | 1066/1827 [00:13<00:09, 84.07it/s]
|
1749 |
59%|█████▉ | 1075/1827 [00:13<00:08, 84.64it/s]
|
1750 |
59%|█████▉ | 1084/1827 [00:13<00:08, 85.01it/s]
|
1751 |
60%|█████▉ | 1093/1827 [00:13<00:08, 85.36it/s]
|
1752 |
60%|██████ | 1102/1827 [00:13<00:08, 84.17it/s]
|
1753 |
61%|██████ | 1111/1827 [00:13<00:08, 82.68it/s]
|
1754 |
61%|██████▏ | 1120/1827 [00:13<00:08, 81.76it/s]
|
1755 |
62%|██████▏ | 1129/1827 [00:13<00:08, 81.92it/s]
|
1756 |
62%|██████▏ | 1138/1827 [00:13<00:08, 81.83it/s]
|
1757 |
63%|██████▎ | 1147/1827 [00:13<00:08, 82.31it/s]
|
1758 |
63%|██████▎ | 1156/1827 [00:14<00:08, 82.02it/s]
|
1759 |
64%|██████▍ | 1165/1827 [00:14<00:08, 79.48it/s]
|
1760 |
64%|██████▍ | 1174/1827 [00:14<00:08, 80.60it/s]
|
1761 |
65%|██████▍ | 1183/1827 [00:14<00:08, 78.91it/s]
|
1762 |
65%|██████▌ | 1192/1827 [00:14<00:07, 81.02it/s]
|
1763 |
66%|██████▌ | 1201/1827 [00:14<00:07, 80.79it/s]
|
1764 |
66%|██████▌ | 1210/1827 [00:14<00:07, 81.06it/s]
|
1765 |
67%|██████▋ | 1219/1827 [00:14<00:07, 79.89it/s]
|
1766 |
67%|██████▋ | 1228/1827 [00:14<00:07, 81.29it/s]
|
1767 |
68%|██████▊ | 1237/1827 [00:15<00:07, 82.50it/s]
|
1768 |
68%|██████▊ | 1246/1827 [00:15<00:07, 82.71it/s]
|
1769 |
69%|██████▊ | 1255/1827 [00:15<00:07, 80.01it/s]
|
1770 |
69%|██████▉ | 1264/1827 [00:15<00:07, 80.07it/s]
|
1771 |
70%|██████▉ | 1273/1827 [00:15<00:06, 81.19it/s]
|
1772 |
70%|███████ | 1282/1827 [00:15<00:06, 82.47it/s]
|
1773 |
71%|███████ | 1291/1827 [00:15<00:06, 83.35it/s]
|
1774 |
71%|███████ | 1300/1827 [00:15<00:06, 83.34it/s]
|
1775 |
72%|███████▏ | 1309/1827 [00:15<00:06, 83.88it/s]
|
1776 |
72%|███████▏ | 1318/1827 [00:16<00:05, 84.84it/s]
|
1777 |
73%|███████▎ | 1327/1827 [00:16<00:05, 83.90it/s]
|
1778 |
73%|███████▎ | 1336/1827 [00:16<00:05, 84.12it/s]
|
1779 |
74%|███████▎ | 1345/1827 [00:16<00:05, 83.22it/s]
|
1780 |
74%|███████▍ | 1354/1827 [00:16<00:05, 82.63it/s]
|
1781 |
75%|███████▍ | 1363/1827 [00:16<00:05, 82.32it/s]
|
1782 |
75%|███████▌ | 1372/1827 [00:16<00:05, 81.87it/s]
|
1783 |
76%|███████▌ | 1381/1827 [00:16<00:05, 82.35it/s]
|
1784 |
76%|███████▌ | 1390/1827 [00:16<00:05, 81.43it/s]
|
1785 |
77%|███████▋ | 1399/1827 [00:17<00:05, 82.08it/s]
|
1786 |
77%|███████▋ | 1408/1827 [00:17<00:05, 82.67it/s]
|
1787 |
78%|███████▊ | 1417/1827 [00:17<00:04, 82.84it/s]
|
1788 |
78%|███████▊ | 1426/1827 [00:17<00:04, 82.22it/s]
|
1789 |
79%|███████▊ | 1435/1827 [00:17<00:04, 81.18it/s]
|
1790 |
79%|███████▉ | 1444/1827 [00:17<00:04, 78.91it/s]
|
1791 |
80%|███████▉ | 1453/1827 [00:17<00:04, 80.47it/s]
|
1792 |
80%|████████ | 1462/1827 [00:17<00:04, 79.66it/s]
|
1793 |
80%|████████ | 1470/1827 [00:17<00:04, 78.26it/s]
|
1794 |
81%|████████ | 1478/1827 [00:18<00:04, 78.65it/s]
|
1795 |
81%|████████▏ | 1487/1827 [00:18<00:04, 79.78it/s]
|
1796 |
82%|████████▏ | 1495/1827 [00:18<00:04, 77.95it/s]
|
1797 |
82%|████████▏ | 1503/1827 [00:18<00:04, 77.75it/s]
|
1798 |
83%|████████▎ | 1512/1827 [00:18<00:03, 79.71it/s]
|
1799 |
83%|████████▎ | 1521/1827 [00:18<00:03, 80.57it/s]
|
1800 |
84%|████████▎ | 1530/1827 [00:18<00:03, 80.96it/s]
|
1801 |
84%|████████▍ | 1539/1827 [00:18<00:03, 81.58it/s]
|
1802 |
85%|████████▍ | 1548/1827 [00:18<00:03, 81.13it/s]
|
1803 |
85%|████████▌ | 1557/1827 [00:19<00:03, 81.30it/s]
|
1804 |
86%|████████▌ | 1566/1827 [00:19<00:03, 81.60it/s]
|
1805 |
86%|████████▌ | 1575/1827 [00:19<00:03, 81.67it/s]
|
1806 |
87%|████████▋ | 1584/1827 [00:19<00:02, 82.53it/s]
|
1807 |
87%|████████▋ | 1593/1827 [00:19<00:02, 82.02it/s]
|
1808 |
88%|████████▊ | 1602/1827 [00:19<00:02, 82.22it/s]
|
1809 |
88%|████████▊ | 1611/1827 [00:19<00:02, 82.92it/s]
|
1810 |
89%|████████▊ | 1620/1827 [00:19<00:02, 79.31it/s]
|
1811 |
89%|████████▉ | 1628/1827 [00:19<00:02, 76.38it/s]
|
1812 |
90%|████████▉ | 1637/1827 [00:20<00:02, 78.37it/s]
|
1813 |
90%|█████████ | 1646/1827 [00:20<00:02, 79.98it/s]
|
1814 |
91%|█████████ | 1655/1827 [00:20<00:02, 81.60it/s]
|
1815 |
91%|█████████ | 1664/1827 [00:20<00:02, 81.06it/s]
|
1816 |
92%|█████████▏| 1673/1827 [00:20<00:01, 81.37it/s]
|
1817 |
92%|█████████▏| 1682/1827 [00:20<00:01, 82.25it/s]
|
1818 |
93%|█████████▎| 1691/1827 [00:20<00:01, 83.16it/s]
|
1819 |
93%|█████████▎| 1700/1827 [00:20<00:01, 81.57it/s]
|
1820 |
94%|█████████▎| 1709/1827 [00:20<00:01, 82.04it/s]
|
1821 |
94%|█████████▍| 1718/1827 [00:21<00:01, 83.23it/s]
|
1822 |
95%|█████████▍| 1727/1827 [00:21<00:01, 80.36it/s]
|
1823 |
95%|█████████▌| 1736/1827 [00:21<00:01, 81.04it/s]
|
1824 |
96%|█████████▌| 1745/1827 [00:21<00:01, 81.89it/s]
|
1825 |
96%|█████████▌| 1754/1827 [00:21<00:00, 82.74it/s]
|
1826 |
96%|█████████▋| 1763/1827 [00:21<00:00, 82.25it/s]
|
1827 |
97%|█████████▋| 1772/1827 [00:21<00:00, 82.64it/s]
|
1828 |
97%|█████████▋| 1781/1827 [00:21<00:00, 83.22it/s]
|
1829 |
98%|█████████▊| 1790/1827 [00:21<00:00, 83.23it/s]
|
1830 |
98%|█████████▊| 1799/1827 [00:22<00:00, 80.14it/s]
|
1831 |
99%|█████████▉| 1808/1827 [00:22<00:00, 81.33it/s]
|
1832 |
99%|█████████▉| 1817/1827 [00:22<00:00, 81.79it/s]
|
1833 |
+
[INFO|trainer.py:3503] 2024-09-06 00:04:30,726 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1834 |
+
[INFO|configuration_utils.py:472] 2024-09-06 00:04:30,727 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1835 |
+
[INFO|modeling_utils.py:2799] 2024-09-06 00:04:32,122 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1836 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-06 00:04:32,123 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1837 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-06 00:04:32,123 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1838 |
+
***** predict metrics *****
|
1839 |
+
predict_accuracy = 0.9438
|
1840 |
+
predict_f1 = 0.6366
|
1841 |
+
predict_loss = 0.2277
|
1842 |
+
predict_precision = 0.5196
|
1843 |
+
predict_recall = 0.8216
|
1844 |
+
predict_runtime = 0:00:29.10
|
1845 |
+
predict_samples_per_second = 502.103
|
1846 |
+
predict_steps_per_second = 62.771
|
1847 |
+
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
-
"total_flos": 1.
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 3.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 10.0,
|
3 |
+
"total_flos": 1.7176580067661056e+16,
|
4 |
+
"train_loss": 0.0812657987344287,
|
5 |
+
"train_runtime": 1549.44,
|
6 |
+
"train_samples": 34604,
|
7 |
+
"train_samples_per_second": 223.332,
|
8 |
+
"train_steps_per_second": 3.492
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,208 +1,215 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-
|
4 |
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch":
|
13 |
-
"
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"eval_precision": 0.9292196007259528,
|
17 |
-
"eval_recall": 0.9411764705882353,
|
18 |
-
"eval_runtime": 13.9946,
|
19 |
-
"eval_samples_per_second": 486.615,
|
20 |
-
"eval_steps_per_second": 60.88,
|
21 |
-
"step": 466
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"epoch": 1.0729613733905579,
|
25 |
-
"grad_norm": 0.07174628973007202,
|
26 |
-
"learning_rate": 4.4635193133047216e-05,
|
27 |
-
"loss": 0.0199,
|
28 |
"step": 500
|
29 |
},
|
30 |
{
|
31 |
-
"epoch":
|
32 |
-
"eval_accuracy": 0.
|
33 |
-
"eval_f1": 0.
|
34 |
-
"eval_loss": 0.
|
35 |
-
"eval_precision": 0.
|
36 |
-
"eval_recall": 0.
|
37 |
-
"eval_runtime": 14.
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second":
|
40 |
-
"step":
|
41 |
-
},
|
42 |
-
{
|
43 |
-
"epoch":
|
44 |
-
"grad_norm":
|
45 |
-
"learning_rate":
|
46 |
-
"loss": 0.
|
47 |
"step": 1000
|
48 |
},
|
49 |
{
|
50 |
-
"epoch":
|
51 |
-
"eval_accuracy": 0.
|
52 |
-
"eval_f1": 0.
|
53 |
-
"eval_loss": 0.
|
54 |
-
"eval_precision": 0.
|
55 |
-
"eval_recall": 0.
|
56 |
-
"eval_runtime":
|
57 |
-
"eval_samples_per_second":
|
58 |
-
"eval_steps_per_second":
|
59 |
-
"step":
|
60 |
-
},
|
61 |
-
{
|
62 |
-
"epoch":
|
63 |
-
"grad_norm": 0.
|
64 |
-
"learning_rate": 3.
|
65 |
-
"loss": 0.
|
66 |
"step": 1500
|
67 |
},
|
68 |
{
|
69 |
-
"epoch":
|
70 |
-
"eval_accuracy": 0.
|
71 |
-
"eval_f1": 0.
|
72 |
-
"eval_loss": 0.
|
73 |
-
"eval_precision": 0.
|
74 |
-
"eval_recall": 0.
|
75 |
-
"eval_runtime": 14.
|
76 |
-
"eval_samples_per_second":
|
77 |
-
"eval_steps_per_second": 59.
|
78 |
-
"step":
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"epoch":
|
82 |
-
"grad_norm":
|
83 |
-
"learning_rate":
|
84 |
-
"loss": 0.
|
85 |
"step": 2000
|
86 |
},
|
87 |
{
|
88 |
-
"epoch":
|
89 |
-
"eval_accuracy": 0.
|
90 |
-
"eval_f1": 0.
|
91 |
-
"eval_loss": 0.
|
92 |
-
"eval_precision": 0.
|
93 |
-
"eval_recall": 0.
|
94 |
-
"eval_runtime":
|
95 |
-
"eval_samples_per_second":
|
96 |
-
"eval_steps_per_second":
|
97 |
-
"step":
|
98 |
-
},
|
99 |
-
{
|
100 |
-
"epoch":
|
101 |
-
"grad_norm":
|
102 |
-
"learning_rate": 2.
|
103 |
-
"loss": 0.
|
104 |
"step": 2500
|
105 |
},
|
106 |
{
|
107 |
-
"epoch":
|
108 |
-
"eval_accuracy": 0.
|
109 |
-
"eval_f1": 0.
|
110 |
-
"eval_loss": 0.
|
111 |
-
"eval_precision": 0.
|
112 |
-
"eval_recall": 0.
|
113 |
-
"eval_runtime": 14.
|
114 |
-
"eval_samples_per_second":
|
115 |
-
"eval_steps_per_second":
|
116 |
-
"step":
|
117 |
-
},
|
118 |
-
{
|
119 |
-
"epoch":
|
120 |
-
"grad_norm":
|
121 |
-
"learning_rate":
|
122 |
-
"loss": 0.
|
123 |
"step": 3000
|
124 |
},
|
125 |
{
|
126 |
-
"epoch":
|
127 |
-
"eval_accuracy": 0.
|
128 |
-
"eval_f1": 0.
|
129 |
-
"eval_loss": 0.
|
130 |
-
"eval_precision": 0.
|
131 |
-
"eval_recall": 0.
|
132 |
-
"eval_runtime":
|
133 |
-
"eval_samples_per_second":
|
134 |
-
"eval_steps_per_second":
|
135 |
-
"step":
|
136 |
-
},
|
137 |
-
{
|
138 |
-
"epoch":
|
139 |
-
"grad_norm": 0.
|
140 |
-
"learning_rate": 1.
|
141 |
-
"loss": 0.
|
142 |
"step": 3500
|
143 |
},
|
144 |
{
|
145 |
-
"epoch":
|
146 |
-
"eval_accuracy": 0.
|
147 |
-
"eval_f1": 0.
|
148 |
-
"eval_loss": 0.
|
149 |
-
"eval_precision": 0.
|
150 |
-
"eval_recall": 0.
|
151 |
-
"eval_runtime":
|
152 |
-
"eval_samples_per_second":
|
153 |
-
"eval_steps_per_second":
|
154 |
-
"step":
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"epoch":
|
158 |
-
"grad_norm": 0.
|
159 |
-
"learning_rate":
|
160 |
-
"loss": 0.
|
161 |
"step": 4000
|
162 |
},
|
163 |
{
|
164 |
-
"epoch":
|
165 |
-
"eval_accuracy": 0.
|
166 |
-
"eval_f1": 0.
|
167 |
-
"eval_loss": 0.
|
168 |
-
"eval_precision": 0.
|
169 |
-
"eval_recall": 0.
|
170 |
-
"eval_runtime":
|
171 |
-
"eval_samples_per_second":
|
172 |
-
"eval_steps_per_second":
|
173 |
-
"step":
|
174 |
-
},
|
175 |
-
{
|
176 |
-
"epoch":
|
177 |
-
"grad_norm": 0.
|
178 |
-
"learning_rate":
|
179 |
-
"loss": 0.
|
180 |
"step": 4500
|
181 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
{
|
183 |
"epoch": 10.0,
|
184 |
-
"eval_accuracy": 0.
|
185 |
-
"eval_f1": 0.
|
186 |
-
"eval_loss":
|
187 |
-
"eval_precision": 0.
|
188 |
-
"eval_recall": 0.
|
189 |
-
"eval_runtime": 14.
|
190 |
-
"eval_samples_per_second":
|
191 |
-
"eval_steps_per_second": 58.
|
192 |
-
"step":
|
193 |
},
|
194 |
{
|
195 |
"epoch": 10.0,
|
196 |
-
"step":
|
197 |
-
"total_flos": 1.
|
198 |
-
"train_loss": 0.
|
199 |
-
"train_runtime":
|
200 |
-
"train_samples_per_second":
|
201 |
-
"train_steps_per_second": 3.
|
202 |
}
|
203 |
],
|
204 |
"logging_steps": 500,
|
205 |
-
"max_steps":
|
206 |
"num_input_tokens_seen": 0,
|
207 |
"num_train_epochs": 10,
|
208 |
"save_steps": 500,
|
@@ -218,7 +225,7 @@
|
|
218 |
"attributes": {}
|
219 |
}
|
220 |
},
|
221 |
-
"total_flos": 1.
|
222 |
"train_batch_size": 32,
|
223 |
"trial_name": null,
|
224 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.45455732567249935,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705",
|
4 |
"epoch": 10.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5410,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.9242144177449169,
|
13 |
+
"grad_norm": 1.4528056383132935,
|
14 |
+
"learning_rate": 4.537892791127542e-05,
|
15 |
+
"loss": 0.3191,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"step": 500
|
17 |
},
|
18 |
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.8443022505389485,
|
21 |
+
"eval_f1": 0.407486125870823,
|
22 |
+
"eval_loss": 0.47723615169525146,
|
23 |
+
"eval_precision": 0.27250473783954515,
|
24 |
+
"eval_recall": 0.8074403369209172,
|
25 |
+
"eval_runtime": 14.3908,
|
26 |
+
"eval_samples_per_second": 473.218,
|
27 |
+
"eval_steps_per_second": 59.204,
|
28 |
+
"step": 541
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 1.8484288354898335,
|
32 |
+
"grad_norm": 1.5587466955184937,
|
33 |
+
"learning_rate": 4.075785582255083e-05,
|
34 |
+
"loss": 0.1619,
|
35 |
"step": 1000
|
36 |
},
|
37 |
{
|
38 |
+
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.8552871874442172,
|
40 |
+
"eval_f1": 0.43975123088883133,
|
41 |
+
"eval_loss": 0.4583655595779419,
|
42 |
+
"eval_precision": 0.30406737143881024,
|
43 |
+
"eval_recall": 0.7941038839494619,
|
44 |
+
"eval_runtime": 14.212,
|
45 |
+
"eval_samples_per_second": 479.173,
|
46 |
+
"eval_steps_per_second": 59.949,
|
47 |
+
"step": 1082
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 2.7726432532347505,
|
51 |
+
"grad_norm": 0.9616082310676575,
|
52 |
+
"learning_rate": 3.613678373382625e-05,
|
53 |
+
"loss": 0.11,
|
54 |
"step": 1500
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.8435470361267112,
|
59 |
+
"eval_f1": 0.4338006724608259,
|
60 |
+
"eval_loss": 0.6447410583496094,
|
61 |
+
"eval_precision": 0.29758899817216466,
|
62 |
+
"eval_recall": 0.7999532054281703,
|
63 |
+
"eval_runtime": 14.3233,
|
64 |
+
"eval_samples_per_second": 475.451,
|
65 |
+
"eval_steps_per_second": 59.484,
|
66 |
+
"step": 1623
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 3.6968576709796674,
|
70 |
+
"grad_norm": 1.4018645286560059,
|
71 |
+
"learning_rate": 3.1515711645101665e-05,
|
72 |
+
"loss": 0.0764,
|
73 |
"step": 2000
|
74 |
},
|
75 |
{
|
76 |
+
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.8398602166778805,
|
78 |
+
"eval_f1": 0.42338430558177587,
|
79 |
+
"eval_loss": 0.7413247227668762,
|
80 |
+
"eval_precision": 0.2895756219333735,
|
81 |
+
"eval_recall": 0.7870846981750117,
|
82 |
+
"eval_runtime": 14.5396,
|
83 |
+
"eval_samples_per_second": 468.375,
|
84 |
+
"eval_steps_per_second": 58.598,
|
85 |
+
"step": 2164
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 4.621072088724584,
|
89 |
+
"grad_norm": 1.0904265642166138,
|
90 |
+
"learning_rate": 2.6894639556377083e-05,
|
91 |
+
"loss": 0.0567,
|
92 |
"step": 2500
|
93 |
},
|
94 |
{
|
95 |
+
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.8564886649182308,
|
97 |
+
"eval_f1": 0.45455732567249935,
|
98 |
+
"eval_loss": 0.7005925178527832,
|
99 |
+
"eval_precision": 0.3152508603513856,
|
100 |
+
"eval_recall": 0.8144595226953674,
|
101 |
+
"eval_runtime": 14.2723,
|
102 |
+
"eval_samples_per_second": 477.148,
|
103 |
+
"eval_steps_per_second": 59.696,
|
104 |
+
"step": 2705
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 5.545286506469501,
|
108 |
+
"grad_norm": 1.240962028503418,
|
109 |
+
"learning_rate": 2.2273567467652497e-05,
|
110 |
+
"loss": 0.0428,
|
111 |
"step": 3000
|
112 |
},
|
113 |
{
|
114 |
+
"epoch": 6.0,
|
115 |
+
"eval_accuracy": 0.8504057561069384,
|
116 |
+
"eval_f1": 0.44700636942675154,
|
117 |
+
"eval_loss": 0.8111857175827026,
|
118 |
+
"eval_precision": 0.30710659898477155,
|
119 |
+
"eval_recall": 0.8210107627515209,
|
120 |
+
"eval_runtime": 14.3991,
|
121 |
+
"eval_samples_per_second": 472.946,
|
122 |
+
"eval_steps_per_second": 59.17,
|
123 |
+
"step": 3246
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 6.469500924214418,
|
127 |
+
"grad_norm": 0.44737720489501953,
|
128 |
+
"learning_rate": 1.7652495378927914e-05,
|
129 |
+
"loss": 0.0332,
|
130 |
"step": 3500
|
131 |
},
|
132 |
{
|
133 |
+
"epoch": 7.0,
|
134 |
+
"eval_accuracy": 0.8532961676301372,
|
135 |
+
"eval_f1": 0.4493518337567586,
|
136 |
+
"eval_loss": 0.904643714427948,
|
137 |
+
"eval_precision": 0.3113658932924077,
|
138 |
+
"eval_recall": 0.8069723912026205,
|
139 |
+
"eval_runtime": 14.3036,
|
140 |
+
"eval_samples_per_second": 476.105,
|
141 |
+
"eval_steps_per_second": 59.566,
|
142 |
+
"step": 3787
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 7.393715341959335,
|
146 |
+
"grad_norm": 0.7116318941116333,
|
147 |
+
"learning_rate": 1.3031423290203328e-05,
|
148 |
+
"loss": 0.0257,
|
149 |
"step": 4000
|
150 |
},
|
151 |
{
|
152 |
+
"epoch": 8.0,
|
153 |
+
"eval_accuracy": 0.8481538440413583,
|
154 |
+
"eval_f1": 0.44435897435897437,
|
155 |
+
"eval_loss": 0.9722912907600403,
|
156 |
+
"eval_precision": 0.30602154335158044,
|
157 |
+
"eval_recall": 0.8109499298081423,
|
158 |
+
"eval_runtime": 14.4222,
|
159 |
+
"eval_samples_per_second": 472.19,
|
160 |
+
"eval_steps_per_second": 59.076,
|
161 |
+
"step": 4328
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 8.317929759704251,
|
165 |
+
"grad_norm": 0.9520462155342102,
|
166 |
+
"learning_rate": 8.410351201478742e-06,
|
167 |
+
"loss": 0.022,
|
168 |
"step": 4500
|
169 |
},
|
170 |
+
{
|
171 |
+
"epoch": 9.0,
|
172 |
+
"eval_accuracy": 0.850186057368833,
|
173 |
+
"eval_f1": 0.44668737060041414,
|
174 |
+
"eval_loss": 1.002764105796814,
|
175 |
+
"eval_precision": 0.30871042747272404,
|
176 |
+
"eval_recall": 0.8076743097800655,
|
177 |
+
"eval_runtime": 14.2485,
|
178 |
+
"eval_samples_per_second": 477.944,
|
179 |
+
"eval_steps_per_second": 59.796,
|
180 |
+
"step": 4869
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 9.242144177449168,
|
184 |
+
"grad_norm": 0.6707109212875366,
|
185 |
+
"learning_rate": 3.789279112754159e-06,
|
186 |
+
"loss": 0.0181,
|
187 |
+
"step": 5000
|
188 |
+
},
|
189 |
{
|
190 |
"epoch": 10.0,
|
191 |
+
"eval_accuracy": 0.8533304955579661,
|
192 |
+
"eval_f1": 0.45038613797131544,
|
193 |
+
"eval_loss": 1.0022608041763306,
|
194 |
+
"eval_precision": 0.31162999550965426,
|
195 |
+
"eval_recall": 0.8118858212447356,
|
196 |
+
"eval_runtime": 14.4559,
|
197 |
+
"eval_samples_per_second": 471.089,
|
198 |
+
"eval_steps_per_second": 58.938,
|
199 |
+
"step": 5410
|
200 |
},
|
201 |
{
|
202 |
"epoch": 10.0,
|
203 |
+
"step": 5410,
|
204 |
+
"total_flos": 1.7176580067661056e+16,
|
205 |
+
"train_loss": 0.0812657987344287,
|
206 |
+
"train_runtime": 1549.44,
|
207 |
+
"train_samples_per_second": 223.332,
|
208 |
+
"train_steps_per_second": 3.492
|
209 |
}
|
210 |
],
|
211 |
"logging_steps": 500,
|
212 |
+
"max_steps": 5410,
|
213 |
"num_input_tokens_seen": 0,
|
214 |
"num_train_epochs": 10,
|
215 |
"save_steps": 500,
|
|
|
225 |
"attributes": {}
|
226 |
}
|
227 |
},
|
228 |
+
"total_flos": 1.7176580067661056e+16,
|
229 |
"train_batch_size": 32,
|
230 |
"trial_name": null,
|
231 |
"trial_params": null
|