Rodrigo1771 commited on
Commit
3bd3fee
·
verified ·
1 Parent(s): f720220

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - combined-train-distemist-dev-85-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +19,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: combined-train-distemist-dev-85-ner
22
- type: combined-train-distemist-dev-85-ner
23
  config: CombinedTrainDisTEMISTDevNER
24
  split: validation
25
  args: CombinedTrainDisTEMISTDevNER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.31162999550965426
30
  - name: Recall
31
  type: recall
32
- value: 0.8118858212447356
33
  - name: F1
34
  type: f1
35
- value: 0.45038613797131544
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.8533304955579661
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the combined-train-distemist-dev-85-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 1.0023
49
- - Precision: 0.3116
50
- - Recall: 0.8119
51
- - F1: 0.4504
52
- - Accuracy: 0.8533
53
 
54
  ## Model description
55
 
 
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/combined-train-distemist-dev-85-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/combined-train-distemist-dev-85-ner
23
+ type: Rodrigo1771/combined-train-distemist-dev-85-ner
24
  config: CombinedTrainDisTEMISTDevNER
25
  split: validation
26
  args: CombinedTrainDisTEMISTDevNER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.3152508603513856
31
  - name: Recall
32
  type: recall
33
+ value: 0.8144595226953674
34
  - name: F1
35
  type: f1
36
+ value: 0.45455732567249935
37
  - name: Accuracy
38
  type: accuracy
39
+ value: 0.8564886649182308
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/combined-train-distemist-dev-85-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.7006
50
+ - Precision: 0.3153
51
+ - Recall: 0.8145
52
+ - F1: 0.4546
53
+ - Accuracy: 0.8565
54
 
55
  ## Model description
56
 
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.9989426998228679,
4
- "eval_f1": 0.9491525423728814,
5
- "eval_loss": 0.004777050111442804,
6
- "eval_precision": 0.9461187214611873,
7
- "eval_recall": 0.9522058823529411,
8
- "eval_runtime": 13.9476,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 488.256,
11
- "eval_steps_per_second": 61.086,
12
- "predict_accuracy": 0.9987478324070453,
13
- "predict_f1": 0.9243073407597828,
14
- "predict_loss": 0.005894536152482033,
15
- "predict_precision": 0.9069506726457399,
16
- "predict_recall": 0.9423412929528246,
17
- "predict_runtime": 28.2179,
18
- "predict_samples_per_second": 517.899,
19
- "predict_steps_per_second": 64.746,
20
- "total_flos": 1.4714840952259542e+16,
21
- "train_loss": 0.002772659832779558,
22
- "train_runtime": 1349.0548,
23
- "train_samples": 29797,
24
- "train_samples_per_second": 220.873,
25
- "train_steps_per_second": 3.454
26
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.8564886649182308,
4
+ "eval_f1": 0.45455732567249935,
5
+ "eval_loss": 0.7005925178527832,
6
+ "eval_precision": 0.3152508603513856,
7
+ "eval_recall": 0.8144595226953674,
8
+ "eval_runtime": 14.2934,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 476.445,
11
+ "eval_steps_per_second": 59.608,
12
+ "predict_accuracy": 0.9437946603149774,
13
+ "predict_f1": 0.6365870441364396,
14
+ "predict_loss": 0.22766011953353882,
15
+ "predict_precision": 0.519576379974326,
16
+ "predict_recall": 0.8216188784572444,
17
+ "predict_runtime": 29.1056,
18
+ "predict_samples_per_second": 502.103,
19
+ "predict_steps_per_second": 62.771,
20
+ "total_flos": 1.7176580067661056e+16,
21
+ "train_loss": 0.0812657987344287,
22
+ "train_runtime": 1549.44,
23
+ "train_samples": 34604,
24
+ "train_samples_per_second": 223.332,
25
+ "train_steps_per_second": 3.492
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.9989426998228679,
4
- "eval_f1": 0.9491525423728814,
5
- "eval_loss": 0.004777050111442804,
6
- "eval_precision": 0.9461187214611873,
7
- "eval_recall": 0.9522058823529411,
8
- "eval_runtime": 13.9476,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 488.256,
11
- "eval_steps_per_second": 61.086
12
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.8564886649182308,
4
+ "eval_f1": 0.45455732567249935,
5
+ "eval_loss": 0.7005925178527832,
6
+ "eval_precision": 0.3152508603513856,
7
+ "eval_recall": 0.8144595226953674,
8
+ "eval_runtime": 14.2934,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 476.445,
11
+ "eval_steps_per_second": 59.608
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9987478324070453,
3
- "predict_f1": 0.9243073407597828,
4
- "predict_loss": 0.005894536152482033,
5
- "predict_precision": 0.9069506726457399,
6
- "predict_recall": 0.9423412929528246,
7
- "predict_runtime": 28.2179,
8
- "predict_samples_per_second": 517.899,
9
- "predict_steps_per_second": 64.746
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9437946603149774,
3
+ "predict_f1": 0.6365870441364396,
4
+ "predict_loss": 0.22766011953353882,
5
+ "predict_precision": 0.519576379974326,
6
+ "predict_recall": 0.8216188784572444,
7
+ "predict_runtime": 29.1056,
8
+ "predict_samples_per_second": 502.103,
9
+ "predict_steps_per_second": 62.771
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725581040.2a66098fac87.15776.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35dfb83cada8fb681a65313600c2857481bd7212900b1573ca8d6ce9b64470bb
3
+ size 560
train.log CHANGED
@@ -1495,3 +1495,53 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1495
  {'eval_loss': 1.0022608041763306, 'eval_precision': 0.31162999550965426, 'eval_recall': 0.8118858212447356, 'eval_f1': 0.45038613797131544, 'eval_accuracy': 0.8533304955579661, 'eval_runtime': 14.4559, 'eval_samples_per_second': 471.089, 'eval_steps_per_second': 58.938, 'epoch': 10.0}
1496
  {'train_runtime': 1549.44, 'train_samples_per_second': 223.332, 'train_steps_per_second': 3.492, 'train_loss': 0.0812657987344287, 'epoch': 10.0}
1497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1498
  0%| | 0/852 [00:00<?, ?it/s]
1499
  1%| | 10/852 [00:00<00:09, 92.77it/s]
1500
  2%|▏ | 20/852 [00:00<00:10, 80.16it/s]
1501
  3%|▎ | 29/852 [00:00<00:10, 79.89it/s]
1502
  4%|▍ | 38/852 [00:00<00:10, 80.51it/s]
1503
  6%|▌ | 47/852 [00:00<00:09, 81.99it/s]
1504
  7%|▋ | 56/852 [00:00<00:09, 82.94it/s]
1505
  8%|▊ | 65/852 [00:00<00:09, 82.00it/s]
1506
  9%|▊ | 74/852 [00:00<00:09, 80.66it/s]
1507
  10%|▉ | 83/852 [00:01<00:09, 80.65it/s]
1508
  11%|█ | 92/852 [00:01<00:09, 80.64it/s]
1509
  12%|█▏ | 101/852 [00:01<00:09, 79.84it/s]
1510
  13%|█▎ | 109/852 [00:01<00:09, 79.60it/s]
1511
  14%|█▍ | 118/852 [00:01<00:09, 80.66it/s]
1512
  15%|█▍ | 127/852 [00:01<00:09, 77.73it/s]
1513
  16%|█▌ | 136/852 [00:01<00:09, 78.64it/s]
1514
  17%|█▋ | 144/852 [00:01<00:08, 78.98it/s]
1515
  18%|█▊ | 152/852 [00:01<00:09, 77.66it/s]
1516
  19%|█▉ | 161/852 [00:02<00:08, 79.31it/s]
1517
  20%|█▉ | 169/852 [00:02<00:08, 79.22it/s]
1518
  21%|██ | 178/852 [00:02<00:08, 79.74it/s]
1519
  22%|██▏ | 187/852 [00:02<00:08, 80.55it/s]
1520
  23%|██▎ | 196/852 [00:02<00:08, 80.55it/s]
1521
  24%|██▍ | 205/852 [00:02<00:08, 80.53it/s]
1522
  25%|██▌ | 214/852 [00:02<00:08, 78.05it/s]
1523
  26%|██▌ | 223/852 [00:02<00:07, 79.41it/s]
1524
  27%|██▋ | 232/852 [00:02<00:07, 80.51it/s]
1525
  28%|██▊ | 241/852 [00:03<00:07, 77.55it/s]
1526
  29%|██▉ | 250/852 [00:03<00:07, 79.18it/s]
1527
  30%|███ | 259/852 [00:03<00:07, 80.41it/s]
1528
  31%|███▏ | 268/852 [00:03<00:07, 80.34it/s]
1529
  33%|███▎ | 277/852 [00:03<00:07, 81.00it/s]
1530
  34%|███▎ | 286/852 [00:03<00:06, 81.59it/s]
1531
  35%|███▍ | 295/852 [00:03<00:06, 80.31it/s]
1532
  36%|███▌ | 304/852 [00:03<00:06, 81.47it/s]
1533
  37%|███▋ | 313/852 [00:03<00:06, 79.79it/s]
1534
  38%|███▊ | 322/852 [00:04<00:06, 81.33it/s]
1535
  39%|███▉ | 331/852 [00:04<00:06, 80.75it/s]
1536
  40%|███▉ | 340/852 [00:04<00:06, 80.70it/s]
1537
  41%|████ | 349/852 [00:04<00:06, 80.37it/s]
1538
  42%|████▏ | 358/852 [00:04<00:06, 78.68it/s]
1539
  43%|████▎ | 367/852 [00:04<00:06, 79.85it/s]
1540
  44%|████▍ | 376/852 [00:04<00:05, 80.53it/s]
1541
  45%|████▌ | 385/852 [00:04<00:05, 80.46it/s]
1542
  46%|████▌ | 394/852 [00:04<00:05, 81.26it/s]
1543
  47%|████▋ | 403/852 [00:05<00:05, 81.11it/s]
1544
  48%|████▊ | 412/852 [00:05<00:05, 79.03it/s]
1545
  49%|████▉ | 421/852 [00:05<00:05, 80.48it/s]
1546
  50%|█████ | 430/852 [00:05<00:05, 79.74it/s]
1547
  52%|█████▏ | 439/852 [00:05<00:05, 81.17it/s]
1548
  53%|█████▎ | 448/852 [00:05<00:04, 81.44it/s]
1549
  54%|█████▎ | 457/852 [00:05<00:04, 82.03it/s]
1550
  55%|█████▍ | 466/852 [00:05<00:04, 79.25it/s]
1551
  56%|█████▌ | 474/852 [00:05<00:05, 75.40it/s]
1552
  57%|█████▋ | 482/852 [00:06<00:04, 76.12it/s]
1553
  58%|█████▊ | 490/852 [00:06<00:04, 76.55it/s]
1554
  59%|█████▊ | 499/852 [00:06<00:04, 78.88it/s]
1555
  60%|█████▉ | 507/852 [00:06<00:04, 78.87it/s]
1556
  61%|██████ | 516/852 [00:06<00:04, 80.91it/s]
1557
  62%|██████▏ | 525/852 [00:06<00:04, 79.57it/s]
1558
  63%|██████▎ | 534/852 [00:06<00:03, 79.97it/s]
1559
  64%|██████▎ | 543/852 [00:06<00:03, 81.17it/s]
1560
  65%|██████▍ | 552/852 [00:06<00:03, 79.32it/s]
1561
  66%|██████▌ | 561/852 [00:07<00:03, 79.73it/s]
1562
  67%|██████▋ | 570/852 [00:07<00:03, 80.17it/s]
1563
  68%|██████▊ | 579/852 [00:07<00:03, 79.01it/s]
1564
  69%|██████▉ | 587/852 [00:07<00:03, 78.03it/s]
1565
  70%|██████▉ | 596/852 [00:07<00:03, 78.78it/s]
1566
  71%|███████ | 604/852 [00:07<00:03, 78.44it/s]
1567
  72%|███████▏ | 612/852 [00:07<00:03, 77.03it/s]
1568
  73%|███████▎ | 620/852 [00:07<00:02, 77.43it/s]
1569
  74%|███████▎ | 628/852 [00:07<00:02, 77.06it/s]
1570
  75%|███████▍ | 636/852 [00:07<00:02, 77.78it/s]
1571
  76%|███████▌ | 644/852 [00:08<00:02, 75.78it/s]
1572
  77%|███████▋ | 653/852 [00:08<00:02, 77.88it/s]
1573
  78%|███████▊ | 662/852 [00:08<00:02, 78.58it/s]
1574
  79%|███████▉ | 671/852 [00:08<00:02, 78.80it/s]
1575
  80%|███████▉ | 680/852 [00:08<00:02, 79.13it/s]
1576
  81%|████████ | 689/852 [00:08<00:02, 80.27it/s]
1577
  82%|████████▏ | 698/852 [00:08<00:01, 80.46it/s]
1578
  83%|████████▎ | 707/852 [00:08<00:01, 80.49it/s]
1579
  84%|████████▍ | 716/852 [00:08<00:01, 80.53it/s]
1580
  85%|████████▌ | 725/852 [00:09<00:01, 81.23it/s]
1581
  86%|████████▌ | 734/852 [00:09<00:01, 82.34it/s]
1582
  87%|████████▋ | 743/852 [00:09<00:01, 82.16it/s]
1583
  88%|████████▊ | 752/852 [00:09<00:01, 82.54it/s]
1584
  89%|████████▉ | 761/852 [00:09<00:01, 83.78it/s]
1585
  90%|█████████ | 770/852 [00:09<00:00, 82.29it/s]
1586
  91%|█████████▏| 779/852 [00:09<00:00, 81.45it/s]
1587
  92%|█████████▏| 788/852 [00:09<00:00, 80.57it/s]
1588
  94%|█████████▎| 797/852 [00:09<00:00, 80.93it/s]
1589
  95%|█████████▍| 806/852 [00:10<00:00, 82.19it/s]
1590
  96%|█████████▌| 815/852 [00:10<00:00, 81.02it/s]
1591
  97%|█████████▋| 824/852 [00:10<00:00, 81.64it/s]
1592
  98%|█████████▊| 833/852 [00:10<00:00, 82.31it/s]
1593
  99%|█████████▉| 842/852 [00:10<00:00, 80.76it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1594
  0%| | 0/1827 [00:00<?, ?it/s]
1595
  1%| | 10/1827 [00:00<00:19, 91.06it/s]
1596
  1%| | 20/1827 [00:00<00:22, 79.50it/s]
1597
  2%|▏ | 29/1827 [00:00<00:21, 81.87it/s]
1598
  2%|▏ | 38/1827 [00:00<00:22, 80.70it/s]
1599
  3%|▎ | 47/1827 [00:00<00:21, 81.06it/s]
1600
  3%|▎ | 56/1827 [00:00<00:21, 80.92it/s]
1601
  4%|▎ | 65/1827 [00:00<00:22, 78.97it/s]
1602
  4%|▍ | 74/1827 [00:00<00:21, 79.80it/s]
1603
  5%|▍ | 83/1827 [00:01<00:21, 80.85it/s]
1604
  5%|▌ | 92/1827 [00:01<00:21, 80.71it/s]
1605
  6%|▌ | 101/1827 [00:01<00:20, 82.27it/s]
1606
  6%|▌ | 110/1827 [00:01<00:20, 82.50it/s]
1607
  7%|▋ | 119/1827 [00:01<00:20, 81.39it/s]
1608
  7%|▋ | 128/1827 [00:01<00:21, 80.71it/s]
1609
  7%|▋ | 137/1827 [00:01<00:20, 81.92it/s]
1610
  8%|▊ | 146/1827 [00:01<00:20, 82.51it/s]
1611
  8%|▊ | 155/1827 [00:01<00:20, 82.36it/s]
1612
  9%|▉ | 164/1827 [00:02<00:20, 81.09it/s]
1613
  9%|▉ | 173/1827 [00:02<00:20, 82.25it/s]
1614
  10%|▉ | 182/1827 [00:02<00:19, 83.50it/s]
1615
  10%|█ | 191/1827 [00:02<00:19, 83.60it/s]
1616
  11%|█ | 200/1827 [00:02<00:19, 82.02it/s]
1617
  11%|█▏ | 209/1827 [00:02<00:19, 81.91it/s]
1618
  12%|���▏ | 218/1827 [00:02<00:19, 80.50it/s]
1619
  12%|█▏ | 227/1827 [00:02<00:19, 81.39it/s]
1620
  13%|█▎ | 236/1827 [00:02<00:20, 78.84it/s]
1621
  13%|█▎ | 245/1827 [00:03<00:19, 80.03it/s]
1622
  14%|█▍ | 254/1827 [00:03<00:19, 80.43it/s]
1623
  14%|█▍ | 263/1827 [00:03<00:19, 80.02it/s]
1624
  15%|█▍ | 272/1827 [00:03<00:19, 81.42it/s]
1625
  15%|█▌ | 281/1827 [00:03<00:18, 82.68it/s]
1626
  16%|█▌ | 290/1827 [00:03<00:18, 82.21it/s]
1627
  16%|█▋ | 299/1827 [00:03<00:18, 82.61it/s]
1628
  17%|█▋ | 308/1827 [00:03<00:18, 82.37it/s]
1629
  17%|█▋ | 317/1827 [00:03<00:18, 82.45it/s]
1630
  18%|█▊ | 326/1827 [00:03<00:17, 83.97it/s]
1631
  18%|█▊ | 335/1827 [00:04<00:17, 83.77it/s]
1632
  19%|█▉ | 344/1827 [00:04<00:17, 84.88it/s]
1633
  19%|█▉ | 353/1827 [00:04<00:18, 79.76it/s]
1634
  20%|█▉ | 362/1827 [00:04<00:18, 80.32it/s]
1635
  20%|██ | 371/1827 [00:04<00:18, 80.44it/s]
1636
  21%|██ | 380/1827 [00:04<00:17, 80.41it/s]
1637
  21%|██▏ | 389/1827 [00:04<00:18, 79.52it/s]
1638
  22%|██▏ | 397/1827 [00:04<00:18, 78.70it/s]
1639
  22%|██▏ | 406/1827 [00:04<00:17, 79.69it/s]
1640
  23%|██▎ | 414/1827 [00:05<00:18, 77.35it/s]
1641
  23%|██▎ | 423/1827 [00:05<00:17, 78.47it/s]
1642
  24%|██▎ | 431/1827 [00:05<00:17, 77.94it/s]
1643
  24%|██▍ | 439/1827 [00:05<00:17, 77.85it/s]
1644
  24%|██▍ | 447/1827 [00:05<00:17, 77.54it/s]
1645
  25%|██▍ | 456/1827 [00:05<00:17, 79.01it/s]
1646
  25%|██▌ | 465/1827 [00:05<00:16, 80.47it/s]
1647
  26%|██▌ | 474/1827 [00:05<00:17, 78.97it/s]
1648
  26%|██▋ | 482/1827 [00:05<00:17, 77.54it/s]
1649
  27%|██▋ | 490/1827 [00:06<00:17, 76.65it/s]
1650
  27%|██▋ | 499/1827 [00:06<00:16, 78.31it/s]
1651
  28%|██▊ | 508/1827 [00:06<00:16, 79.34it/s]
1652
  28%|██▊ | 517/1827 [00:06<00:16, 79.61it/s]
1653
  29%|██▉ | 526/1827 [00:06<00:16, 80.80it/s]
1654
  29%|██▉ | 535/1827 [00:06<00:16, 80.00it/s]
1655
  30%|██▉ | 544/1827 [00:06<00:15, 80.92it/s]
1656
  30%|███ | 553/1827 [00:06<00:15, 80.34it/s]
1657
  31%|███ | 562/1827 [00:06<00:15, 81.20it/s]
1658
  31%|███▏ | 571/1827 [00:07<00:15, 83.07it/s]
1659
  32%|███▏ | 580/1827 [00:07<00:14, 83.38it/s]
1660
  32%|███▏ | 589/1827 [00:07<00:14, 83.03it/s]
1661
  33%|███▎ | 598/1827 [00:07<00:14, 82.25it/s]
1662
  33%|███▎ | 607/1827 [00:07<00:14, 81.98it/s]
1663
  34%|███▎ | 616/1827 [00:07<00:14, 81.52it/s]
1664
  34%|███▍ | 625/1827 [00:07<00:14, 82.30it/s]
1665
  35%|███▍ | 634/1827 [00:07<00:14, 82.87it/s]
1666
  35%|███▌ | 643/1827 [00:07<00:14, 83.83it/s]
1667
  36%|███▌ | 652/1827 [00:08<00:15, 78.28it/s]
1668
  36%|███▌ | 661/1827 [00:08<00:14, 79.08it/s]
1669
  37%|███▋ | 670/1827 [00:08<00:14, 80.28it/s]
1670
  37%|███▋ | 679/1827 [00:08<00:14, 80.78it/s]
1671
  38%|███▊ | 688/1827 [00:08<00:13, 82.11it/s]
1672
  38%|███▊ | 697/1827 [00:08<00:13, 83.76it/s]
1673
  39%|███▊ | 706/1827 [00:08<00:13, 83.15it/s]
1674
  39%|███▉ | 715/1827 [00:08<00:13, 84.66it/s]
1675
  40%|███▉ | 724/1827 [00:08<00:12, 85.34it/s]
1676
  40%|████ | 733/1827 [00:09<00:12, 85.46it/s]
1677
  41%|████ | 742/1827 [00:09<00:12, 85.34it/s]
1678
  41%|████ | 751/1827 [00:09<00:12, 86.45it/s]
1679
  42%|████▏ | 760/1827 [00:09<00:12, 85.05it/s]
1680
  42%|████▏ | 769/1827 [00:09<00:12, 86.03it/s]
1681
  43%|████▎ | 778/1827 [00:09<00:12, 84.73it/s]
1682
  43%|████▎ | 787/1827 [00:09<00:12, 81.97it/s]
1683
  44%|████▎ | 796/1827 [00:09<00:12, 83.05it/s]
1684
  44%|████▍ | 805/1827 [00:09<00:12, 81.61it/s]
1685
  45%|████▍ | 814/1827 [00:09<00:12, 82.53it/s]
1686
  45%|████▌ | 823/1827 [00:10<00:12, 82.99it/s]
1687
  46%|████▌ | 832/1827 [00:10<00:12, 82.38it/s]
1688
  46%|████▌ | 841/1827 [00:10<00:11, 82.96it/s]
1689
  47%|████▋ | 850/1827 [00:10<00:11, 83.72it/s]
1690
  47%|████▋ | 859/1827 [00:10<00:11, 84.83it/s]
1691
  48%|████▊ | 868/1827 [00:10<00:11, 84.11it/s]
1692
  48%|████▊ | 877/1827 [00:10<00:11, 83.69it/s]
1693
  48%|████▊ | 886/1827 [00:10<00:11, 84.15it/s]
1694
  49%|████▉ | 895/1827 [00:10<00:11, 83.37it/s]
1695
  49%|████▉ | 904/1827 [00:11<00:10, 84.07it/s]
1696
  50%|████▉ | 913/1827 [00:11<00:10, 84.82it/s]
1697
  50%|█████ | 922/1827 [00:11<00:10, 84.51it/s]
1698
  51%|█████ | 931/1827 [00:11<00:10, 81.89it/s]
1699
  51%|█████▏ | 940/1827 [00:11<00:10, 82.43it/s]
1700
  52%|█████▏ | 949/1827 [00:11<00:10, 81.02it/s]
1701
  52%|█████▏ | 958/1827 [00:11<00:10, 81.56it/s]
1702
  53%|█████▎ | 967/1827 [00:11<00:10, 83.24it/s]
1703
  53%|█████▎ | 976/1827 [00:11<00:10, 82.41it/s]
1704
  54%|█████▍ | 985/1827 [00:12<00:10, 83.96it/s]
1705
  54%|█████▍ | 994/1827 [00:12<00:09, 84.30it/s]
1706
  55%|█████▍ | 1003/1827 [00:12<00:09, 83.87it/s]
1707
  55%|█████▌ | 1012/1827 [00:12<00:09, 83.40it/s]
1708
  56%|█████▌ | 1021/1827 [00:12<00:09, 84.27it/s]
1709
  56%|█████▋ | 1030/1827 [00:12<00:09, 84.53it/s]
1710
  57%|█████▋ | 1039/1827 [00:12<00:09, 82.78it/s]
1711
  57%|█████▋ | 1048/1827 [00:12<00:09, 83.18it/s]
1712
  58%|█████▊ | 1057/1827 [00:12<00:09, 83.19it/s]
1713
  58%|█████▊ | 1066/1827 [00:13<00:09, 84.07it/s]
1714
  59%|█████▉ | 1075/1827 [00:13<00:08, 84.64it/s]
1715
  59%|█████▉ | 1084/1827 [00:13<00:08, 85.01it/s]
1716
  60%|█████▉ | 1093/1827 [00:13<00:08, 85.36it/s]
1717
  60%|██████ | 1102/1827 [00:13<00:08, 84.17it/s]
1718
  61%|██████ | 1111/1827 [00:13<00:08, 82.68it/s]
1719
  61%|██████▏ | 1120/1827 [00:13<00:08, 81.76it/s]
1720
  62%|██████▏ | 1129/1827 [00:13<00:08, 81.92it/s]
1721
  62%|██████▏ | 1138/1827 [00:13<00:08, 81.83it/s]
1722
  63%|██████▎ | 1147/1827 [00:13<00:08, 82.31it/s]
1723
  63%|██████▎ | 1156/1827 [00:14<00:08, 82.02it/s]
1724
  64%|██████▍ | 1165/1827 [00:14<00:08, 79.48it/s]
1725
  64%|██████▍ | 1174/1827 [00:14<00:08, 80.60it/s]
1726
  65%|██████▍ | 1183/1827 [00:14<00:08, 78.91it/s]
1727
  65%|██████▌ | 1192/1827 [00:14<00:07, 81.02it/s]
1728
  66%|██████▌ | 1201/1827 [00:14<00:07, 80.79it/s]
1729
  66%|██████▌ | 1210/1827 [00:14<00:07, 81.06it/s]
1730
  67%|██████▋ | 1219/1827 [00:14<00:07, 79.89it/s]
1731
  67%|██████▋ | 1228/1827 [00:14<00:07, 81.29it/s]
1732
  68%|██████▊ | 1237/1827 [00:15<00:07, 82.50it/s]
1733
  68%|██████▊ | 1246/1827 [00:15<00:07, 82.71it/s]
1734
  69%|██████▊ | 1255/1827 [00:15<00:07, 80.01it/s]
1735
  69%|██████▉ | 1264/1827 [00:15<00:07, 80.07it/s]
1736
  70%|██████▉ | 1273/1827 [00:15<00:06, 81.19it/s]
1737
  70%|███████ | 1282/1827 [00:15<00:06, 82.47it/s]
1738
  71%|███████ | 1291/1827 [00:15<00:06, 83.35it/s]
1739
  71%|███████ | 1300/1827 [00:15<00:06, 83.34it/s]
1740
  72%|███████▏ | 1309/1827 [00:15<00:06, 83.88it/s]
1741
  72%|███████▏ | 1318/1827 [00:16<00:05, 84.84it/s]
1742
  73%|███████▎ | 1327/1827 [00:16<00:05, 83.90it/s]
1743
  73%|███████▎ | 1336/1827 [00:16<00:05, 84.12it/s]
1744
  74%|███████▎ | 1345/1827 [00:16<00:05, 83.22it/s]
1745
  74%|███████▍ | 1354/1827 [00:16<00:05, 82.63it/s]
1746
  75%|███████▍ | 1363/1827 [00:16<00:05, 82.32it/s]
1747
  75%|███████▌ | 1372/1827 [00:16<00:05, 81.87it/s]
1748
  76%|███████▌ | 1381/1827 [00:16<00:05, 82.35it/s]
1749
  76%|███████▌ | 1390/1827 [00:16<00:05, 81.43it/s]
1750
  77%|███████▋ | 1399/1827 [00:17<00:05, 82.08it/s]
1751
  77%|███████▋ | 1408/1827 [00:17<00:05, 82.67it/s]
1752
  78%|███████▊ | 1417/1827 [00:17<00:04, 82.84it/s]
1753
  78%|███████▊ | 1426/1827 [00:17<00:04, 82.22it/s]
1754
  79%|███████▊ | 1435/1827 [00:17<00:04, 81.18it/s]
1755
  79%|███████▉ | 1444/1827 [00:17<00:04, 78.91it/s]
1756
  80%|███████▉ | 1453/1827 [00:17<00:04, 80.47it/s]
1757
  80%|████████ | 1462/1827 [00:17<00:04, 79.66it/s]
1758
  80%|████████ | 1470/1827 [00:17<00:04, 78.26it/s]
1759
  81%|████████ | 1478/1827 [00:18<00:04, 78.65it/s]
1760
  81%|████████▏ | 1487/1827 [00:18<00:04, 79.78it/s]
1761
  82%|████████▏ | 1495/1827 [00:18<00:04, 77.95it/s]
1762
  82%|████████▏ | 1503/1827 [00:18<00:04, 77.75it/s]
1763
  83%|████████▎ | 1512/1827 [00:18<00:03, 79.71it/s]
1764
  83%|████████▎ | 1521/1827 [00:18<00:03, 80.57it/s]
1765
  84%|████████▎ | 1530/1827 [00:18<00:03, 80.96it/s]
1766
  84%|████████▍ | 1539/1827 [00:18<00:03, 81.58it/s]
1767
  85%|████████▍ | 1548/1827 [00:18<00:03, 81.13it/s]
1768
  85%|████████▌ | 1557/1827 [00:19<00:03, 81.30it/s]
1769
  86%|████████▌ | 1566/1827 [00:19<00:03, 81.60it/s]
1770
  86%|████████▌ | 1575/1827 [00:19<00:03, 81.67it/s]
1771
  87%|████████▋ | 1584/1827 [00:19<00:02, 82.53it/s]
1772
  87%|████████▋ | 1593/1827 [00:19<00:02, 82.02it/s]
1773
  88%|████████▊ | 1602/1827 [00:19<00:02, 82.22it/s]
1774
  88%|████████▊ | 1611/1827 [00:19<00:02, 82.92it/s]
1775
  89%|████████▊ | 1620/1827 [00:19<00:02, 79.31it/s]
1776
  89%|████████▉ | 1628/1827 [00:19<00:02, 76.38it/s]
1777
  90%|████████▉ | 1637/1827 [00:20<00:02, 78.37it/s]
1778
  90%|█████████ | 1646/1827 [00:20<00:02, 79.98it/s]
1779
  91%|█████████ | 1655/1827 [00:20<00:02, 81.60it/s]
1780
  91%|█████████ | 1664/1827 [00:20<00:02, 81.06it/s]
1781
  92%|█████████▏| 1673/1827 [00:20<00:01, 81.37it/s]
1782
  92%|█████████▏| 1682/1827 [00:20<00:01, 82.25it/s]
1783
  93%|█████████▎| 1691/1827 [00:20<00:01, 83.16it/s]
1784
  93%|█████████▎| 1700/1827 [00:20<00:01, 81.57it/s]
1785
  94%|█████████▎| 1709/1827 [00:20<00:01, 82.04it/s]
1786
  94%|█████████▍| 1718/1827 [00:21<00:01, 83.23it/s]
1787
  95%|█████████▍| 1727/1827 [00:21<00:01, 80.36it/s]
1788
  95%|█████████▌| 1736/1827 [00:21<00:01, 81.04it/s]
1789
  96%|█████████▌| 1745/1827 [00:21<00:01, 81.89it/s]
1790
  96%|█████████▌| 1754/1827 [00:21<00:00, 82.74it/s]
1791
  96%|█████████▋| 1763/1827 [00:21<00:00, 82.25it/s]
1792
  97%|█████████▋| 1772/1827 [00:21<00:00, 82.64it/s]
1793
  97%|█████████▋| 1781/1827 [00:21<00:00, 83.22it/s]
1794
  98%|█████████▊| 1790/1827 [00:21<00:00, 83.23it/s]
1795
  98%|█████████▊| 1799/1827 [00:22<00:00, 80.14it/s]
1796
  99%|█████████▉| 1808/1827 [00:22<00:00, 81.33it/s]
1797
  99%|█████████▉| 1817/1827 [00:22<00:00, 81.79it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1495
  {'eval_loss': 1.0022608041763306, 'eval_precision': 0.31162999550965426, 'eval_recall': 0.8118858212447356, 'eval_f1': 0.45038613797131544, 'eval_accuracy': 0.8533304955579661, 'eval_runtime': 14.4559, 'eval_samples_per_second': 471.089, 'eval_steps_per_second': 58.938, 'epoch': 10.0}
1496
  {'train_runtime': 1549.44, 'train_samples_per_second': 223.332, 'train_steps_per_second': 3.492, 'train_loss': 0.0812657987344287, 'epoch': 10.0}
1497
 
1498
+ ***** train metrics *****
1499
+ epoch = 10.0
1500
+ total_flos = 15996936GF
1501
+ train_loss = 0.0813
1502
+ train_runtime = 0:25:49.44
1503
+ train_samples = 34604
1504
+ train_samples_per_second = 223.332
1505
+ train_steps_per_second = 3.492
1506
+ 09/06/2024 00:03:46 - INFO - __main__ - *** Evaluate ***
1507
+ [INFO|trainer.py:811] 2024-09-06 00:03:46,664 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1508
+ [INFO|trainer.py:3819] 2024-09-06 00:03:46,667 >>
1509
+ ***** Running Evaluation *****
1510
+ [INFO|trainer.py:3821] 2024-09-06 00:03:46,667 >> Num examples = 6810
1511
+ [INFO|trainer.py:3824] 2024-09-06 00:03:46,667 >> Batch size = 8
1512
+
1513
  0%| | 0/852 [00:00<?, ?it/s]
1514
  1%| | 10/852 [00:00<00:09, 92.77it/s]
1515
  2%|▏ | 20/852 [00:00<00:10, 80.16it/s]
1516
  3%|▎ | 29/852 [00:00<00:10, 79.89it/s]
1517
  4%|▍ | 38/852 [00:00<00:10, 80.51it/s]
1518
  6%|▌ | 47/852 [00:00<00:09, 81.99it/s]
1519
  7%|▋ | 56/852 [00:00<00:09, 82.94it/s]
1520
  8%|▊ | 65/852 [00:00<00:09, 82.00it/s]
1521
  9%|▊ | 74/852 [00:00<00:09, 80.66it/s]
1522
  10%|▉ | 83/852 [00:01<00:09, 80.65it/s]
1523
  11%|█ | 92/852 [00:01<00:09, 80.64it/s]
1524
  12%|█▏ | 101/852 [00:01<00:09, 79.84it/s]
1525
  13%|█▎ | 109/852 [00:01<00:09, 79.60it/s]
1526
  14%|█▍ | 118/852 [00:01<00:09, 80.66it/s]
1527
  15%|█▍ | 127/852 [00:01<00:09, 77.73it/s]
1528
  16%|█▌ | 136/852 [00:01<00:09, 78.64it/s]
1529
  17%|█▋ | 144/852 [00:01<00:08, 78.98it/s]
1530
  18%|█▊ | 152/852 [00:01<00:09, 77.66it/s]
1531
  19%|█▉ | 161/852 [00:02<00:08, 79.31it/s]
1532
  20%|█▉ | 169/852 [00:02<00:08, 79.22it/s]
1533
  21%|██ | 178/852 [00:02<00:08, 79.74it/s]
1534
  22%|██▏ | 187/852 [00:02<00:08, 80.55it/s]
1535
  23%|██▎ | 196/852 [00:02<00:08, 80.55it/s]
1536
  24%|██▍ | 205/852 [00:02<00:08, 80.53it/s]
1537
  25%|██▌ | 214/852 [00:02<00:08, 78.05it/s]
1538
  26%|██▌ | 223/852 [00:02<00:07, 79.41it/s]
1539
  27%|██▋ | 232/852 [00:02<00:07, 80.51it/s]
1540
  28%|██▊ | 241/852 [00:03<00:07, 77.55it/s]
1541
  29%|██▉ | 250/852 [00:03<00:07, 79.18it/s]
1542
  30%|███ | 259/852 [00:03<00:07, 80.41it/s]
1543
  31%|███▏ | 268/852 [00:03<00:07, 80.34it/s]
1544
  33%|███▎ | 277/852 [00:03<00:07, 81.00it/s]
1545
  34%|███▎ | 286/852 [00:03<00:06, 81.59it/s]
1546
  35%|███▍ | 295/852 [00:03<00:06, 80.31it/s]
1547
  36%|███▌ | 304/852 [00:03<00:06, 81.47it/s]
1548
  37%|███▋ | 313/852 [00:03<00:06, 79.79it/s]
1549
  38%|███▊ | 322/852 [00:04<00:06, 81.33it/s]
1550
  39%|███▉ | 331/852 [00:04<00:06, 80.75it/s]
1551
  40%|███▉ | 340/852 [00:04<00:06, 80.70it/s]
1552
  41%|████ | 349/852 [00:04<00:06, 80.37it/s]
1553
  42%|████▏ | 358/852 [00:04<00:06, 78.68it/s]
1554
  43%|████▎ | 367/852 [00:04<00:06, 79.85it/s]
1555
  44%|████▍ | 376/852 [00:04<00:05, 80.53it/s]
1556
  45%|████▌ | 385/852 [00:04<00:05, 80.46it/s]
1557
  46%|████▌ | 394/852 [00:04<00:05, 81.26it/s]
1558
  47%|████▋ | 403/852 [00:05<00:05, 81.11it/s]
1559
  48%|████▊ | 412/852 [00:05<00:05, 79.03it/s]
1560
  49%|████▉ | 421/852 [00:05<00:05, 80.48it/s]
1561
  50%|█████ | 430/852 [00:05<00:05, 79.74it/s]
1562
  52%|█████▏ | 439/852 [00:05<00:05, 81.17it/s]
1563
  53%|█████▎ | 448/852 [00:05<00:04, 81.44it/s]
1564
  54%|█████▎ | 457/852 [00:05<00:04, 82.03it/s]
1565
  55%|█████▍ | 466/852 [00:05<00:04, 79.25it/s]
1566
  56%|█████▌ | 474/852 [00:05<00:05, 75.40it/s]
1567
  57%|█████▋ | 482/852 [00:06<00:04, 76.12it/s]
1568
  58%|█████▊ | 490/852 [00:06<00:04, 76.55it/s]
1569
  59%|█████▊ | 499/852 [00:06<00:04, 78.88it/s]
1570
  60%|█████▉ | 507/852 [00:06<00:04, 78.87it/s]
1571
  61%|██████ | 516/852 [00:06<00:04, 80.91it/s]
1572
  62%|██████▏ | 525/852 [00:06<00:04, 79.57it/s]
1573
  63%|██████▎ | 534/852 [00:06<00:03, 79.97it/s]
1574
  64%|██████▎ | 543/852 [00:06<00:03, 81.17it/s]
1575
  65%|██████▍ | 552/852 [00:06<00:03, 79.32it/s]
1576
  66%|██████▌ | 561/852 [00:07<00:03, 79.73it/s]
1577
  67%|██████▋ | 570/852 [00:07<00:03, 80.17it/s]
1578
  68%|██████▊ | 579/852 [00:07<00:03, 79.01it/s]
1579
  69%|██████▉ | 587/852 [00:07<00:03, 78.03it/s]
1580
  70%|██████▉ | 596/852 [00:07<00:03, 78.78it/s]
1581
  71%|███████ | 604/852 [00:07<00:03, 78.44it/s]
1582
  72%|███████▏ | 612/852 [00:07<00:03, 77.03it/s]
1583
  73%|███████▎ | 620/852 [00:07<00:02, 77.43it/s]
1584
  74%|███████▎ | 628/852 [00:07<00:02, 77.06it/s]
1585
  75%|███████▍ | 636/852 [00:07<00:02, 77.78it/s]
1586
  76%|███████▌ | 644/852 [00:08<00:02, 75.78it/s]
1587
  77%|███████▋ | 653/852 [00:08<00:02, 77.88it/s]
1588
  78%|███████▊ | 662/852 [00:08<00:02, 78.58it/s]
1589
  79%|███████▉ | 671/852 [00:08<00:02, 78.80it/s]
1590
  80%|███████▉ | 680/852 [00:08<00:02, 79.13it/s]
1591
  81%|████████ | 689/852 [00:08<00:02, 80.27it/s]
1592
  82%|████████▏ | 698/852 [00:08<00:01, 80.46it/s]
1593
  83%|████████▎ | 707/852 [00:08<00:01, 80.49it/s]
1594
  84%|████████▍ | 716/852 [00:08<00:01, 80.53it/s]
1595
  85%|████████▌ | 725/852 [00:09<00:01, 81.23it/s]
1596
  86%|████████▌ | 734/852 [00:09<00:01, 82.34it/s]
1597
  87%|████████▋ | 743/852 [00:09<00:01, 82.16it/s]
1598
  88%|████████▊ | 752/852 [00:09<00:01, 82.54it/s]
1599
  89%|████████▉ | 761/852 [00:09<00:01, 83.78it/s]
1600
  90%|█████████ | 770/852 [00:09<00:00, 82.29it/s]
1601
  91%|█████████▏| 779/852 [00:09<00:00, 81.45it/s]
1602
  92%|█████████▏| 788/852 [00:09<00:00, 80.57it/s]
1603
  94%|█████████▎| 797/852 [00:09<00:00, 80.93it/s]
1604
  95%|█████████▍| 806/852 [00:10<00:00, 82.19it/s]
1605
  96%|█████████▌| 815/852 [00:10<00:00, 81.02it/s]
1606
  97%|█████████▋| 824/852 [00:10<00:00, 81.64it/s]
1607
  98%|█████████▊| 833/852 [00:10<00:00, 82.31it/s]
1608
  99%|█████████▉| 842/852 [00:10<00:00, 80.76it/s]
1609
+ _warn_prf(average, modifier, msg_start, len(result))
1610
+
1611
+ ***** eval metrics *****
1612
+ epoch = 10.0
1613
+ eval_accuracy = 0.8565
1614
+ eval_f1 = 0.4546
1615
+ eval_loss = 0.7006
1616
+ eval_precision = 0.3153
1617
+ eval_recall = 0.8145
1618
+ eval_runtime = 0:00:14.29
1619
+ eval_samples = 6810
1620
+ eval_samples_per_second = 476.445
1621
+ eval_steps_per_second = 59.608
1622
+ 09/06/2024 00:04:00 - INFO - __main__ - *** Predict ***
1623
+ [INFO|trainer.py:811] 2024-09-06 00:04:00,968 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: id, ner_tags, tokens. If id, ner_tags, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1624
+ [INFO|trainer.py:3819] 2024-09-06 00:04:00,971 >>
1625
+ ***** Running Prediction *****
1626
+ [INFO|trainer.py:3821] 2024-09-06 00:04:00,971 >> Num examples = 14614
1627
+ [INFO|trainer.py:3824] 2024-09-06 00:04:00,971 >> Batch size = 8
1628
+
1629
  0%| | 0/1827 [00:00<?, ?it/s]
1630
  1%| | 10/1827 [00:00<00:19, 91.06it/s]
1631
  1%| | 20/1827 [00:00<00:22, 79.50it/s]
1632
  2%|▏ | 29/1827 [00:00<00:21, 81.87it/s]
1633
  2%|▏ | 38/1827 [00:00<00:22, 80.70it/s]
1634
  3%|▎ | 47/1827 [00:00<00:21, 81.06it/s]
1635
  3%|▎ | 56/1827 [00:00<00:21, 80.92it/s]
1636
  4%|▎ | 65/1827 [00:00<00:22, 78.97it/s]
1637
  4%|▍ | 74/1827 [00:00<00:21, 79.80it/s]
1638
  5%|▍ | 83/1827 [00:01<00:21, 80.85it/s]
1639
  5%|▌ | 92/1827 [00:01<00:21, 80.71it/s]
1640
  6%|▌ | 101/1827 [00:01<00:20, 82.27it/s]
1641
  6%|▌ | 110/1827 [00:01<00:20, 82.50it/s]
1642
  7%|▋ | 119/1827 [00:01<00:20, 81.39it/s]
1643
  7%|▋ | 128/1827 [00:01<00:21, 80.71it/s]
1644
  7%|▋ | 137/1827 [00:01<00:20, 81.92it/s]
1645
  8%|▊ | 146/1827 [00:01<00:20, 82.51it/s]
1646
  8%|▊ | 155/1827 [00:01<00:20, 82.36it/s]
1647
  9%|▉ | 164/1827 [00:02<00:20, 81.09it/s]
1648
  9%|▉ | 173/1827 [00:02<00:20, 82.25it/s]
1649
  10%|▉ | 182/1827 [00:02<00:19, 83.50it/s]
1650
  10%|█ | 191/1827 [00:02<00:19, 83.60it/s]
1651
  11%|█ | 200/1827 [00:02<00:19, 82.02it/s]
1652
  11%|█▏ | 209/1827 [00:02<00:19, 81.91it/s]
1653
  12%|���▏ | 218/1827 [00:02<00:19, 80.50it/s]
1654
  12%|█▏ | 227/1827 [00:02<00:19, 81.39it/s]
1655
  13%|█▎ | 236/1827 [00:02<00:20, 78.84it/s]
1656
  13%|█▎ | 245/1827 [00:03<00:19, 80.03it/s]
1657
  14%|█▍ | 254/1827 [00:03<00:19, 80.43it/s]
1658
  14%|█▍ | 263/1827 [00:03<00:19, 80.02it/s]
1659
  15%|█▍ | 272/1827 [00:03<00:19, 81.42it/s]
1660
  15%|█▌ | 281/1827 [00:03<00:18, 82.68it/s]
1661
  16%|█▌ | 290/1827 [00:03<00:18, 82.21it/s]
1662
  16%|█▋ | 299/1827 [00:03<00:18, 82.61it/s]
1663
  17%|█▋ | 308/1827 [00:03<00:18, 82.37it/s]
1664
  17%|█▋ | 317/1827 [00:03<00:18, 82.45it/s]
1665
  18%|█▊ | 326/1827 [00:03<00:17, 83.97it/s]
1666
  18%|█▊ | 335/1827 [00:04<00:17, 83.77it/s]
1667
  19%|█▉ | 344/1827 [00:04<00:17, 84.88it/s]
1668
  19%|█▉ | 353/1827 [00:04<00:18, 79.76it/s]
1669
  20%|█▉ | 362/1827 [00:04<00:18, 80.32it/s]
1670
  20%|██ | 371/1827 [00:04<00:18, 80.44it/s]
1671
  21%|██ | 380/1827 [00:04<00:17, 80.41it/s]
1672
  21%|██▏ | 389/1827 [00:04<00:18, 79.52it/s]
1673
  22%|██▏ | 397/1827 [00:04<00:18, 78.70it/s]
1674
  22%|██▏ | 406/1827 [00:04<00:17, 79.69it/s]
1675
  23%|██▎ | 414/1827 [00:05<00:18, 77.35it/s]
1676
  23%|██▎ | 423/1827 [00:05<00:17, 78.47it/s]
1677
  24%|██▎ | 431/1827 [00:05<00:17, 77.94it/s]
1678
  24%|██▍ | 439/1827 [00:05<00:17, 77.85it/s]
1679
  24%|██▍ | 447/1827 [00:05<00:17, 77.54it/s]
1680
  25%|██▍ | 456/1827 [00:05<00:17, 79.01it/s]
1681
  25%|██▌ | 465/1827 [00:05<00:16, 80.47it/s]
1682
  26%|██▌ | 474/1827 [00:05<00:17, 78.97it/s]
1683
  26%|██▋ | 482/1827 [00:05<00:17, 77.54it/s]
1684
  27%|██▋ | 490/1827 [00:06<00:17, 76.65it/s]
1685
  27%|██▋ | 499/1827 [00:06<00:16, 78.31it/s]
1686
  28%|██▊ | 508/1827 [00:06<00:16, 79.34it/s]
1687
  28%|██▊ | 517/1827 [00:06<00:16, 79.61it/s]
1688
  29%|██▉ | 526/1827 [00:06<00:16, 80.80it/s]
1689
  29%|██▉ | 535/1827 [00:06<00:16, 80.00it/s]
1690
  30%|██▉ | 544/1827 [00:06<00:15, 80.92it/s]
1691
  30%|███ | 553/1827 [00:06<00:15, 80.34it/s]
1692
  31%|███ | 562/1827 [00:06<00:15, 81.20it/s]
1693
  31%|███▏ | 571/1827 [00:07<00:15, 83.07it/s]
1694
  32%|███▏ | 580/1827 [00:07<00:14, 83.38it/s]
1695
  32%|███▏ | 589/1827 [00:07<00:14, 83.03it/s]
1696
  33%|███▎ | 598/1827 [00:07<00:14, 82.25it/s]
1697
  33%|███▎ | 607/1827 [00:07<00:14, 81.98it/s]
1698
  34%|███▎ | 616/1827 [00:07<00:14, 81.52it/s]
1699
  34%|███▍ | 625/1827 [00:07<00:14, 82.30it/s]
1700
  35%|███▍ | 634/1827 [00:07<00:14, 82.87it/s]
1701
  35%|███▌ | 643/1827 [00:07<00:14, 83.83it/s]
1702
  36%|███▌ | 652/1827 [00:08<00:15, 78.28it/s]
1703
  36%|███▌ | 661/1827 [00:08<00:14, 79.08it/s]
1704
  37%|███▋ | 670/1827 [00:08<00:14, 80.28it/s]
1705
  37%|███▋ | 679/1827 [00:08<00:14, 80.78it/s]
1706
  38%|███▊ | 688/1827 [00:08<00:13, 82.11it/s]
1707
  38%|███▊ | 697/1827 [00:08<00:13, 83.76it/s]
1708
  39%|███▊ | 706/1827 [00:08<00:13, 83.15it/s]
1709
  39%|███▉ | 715/1827 [00:08<00:13, 84.66it/s]
1710
  40%|███▉ | 724/1827 [00:08<00:12, 85.34it/s]
1711
  40%|████ | 733/1827 [00:09<00:12, 85.46it/s]
1712
  41%|████ | 742/1827 [00:09<00:12, 85.34it/s]
1713
  41%|████ | 751/1827 [00:09<00:12, 86.45it/s]
1714
  42%|████▏ | 760/1827 [00:09<00:12, 85.05it/s]
1715
  42%|████▏ | 769/1827 [00:09<00:12, 86.03it/s]
1716
  43%|████▎ | 778/1827 [00:09<00:12, 84.73it/s]
1717
  43%|████▎ | 787/1827 [00:09<00:12, 81.97it/s]
1718
  44%|████▎ | 796/1827 [00:09<00:12, 83.05it/s]
1719
  44%|████▍ | 805/1827 [00:09<00:12, 81.61it/s]
1720
  45%|████▍ | 814/1827 [00:09<00:12, 82.53it/s]
1721
  45%|████▌ | 823/1827 [00:10<00:12, 82.99it/s]
1722
  46%|████▌ | 832/1827 [00:10<00:12, 82.38it/s]
1723
  46%|████▌ | 841/1827 [00:10<00:11, 82.96it/s]
1724
  47%|████▋ | 850/1827 [00:10<00:11, 83.72it/s]
1725
  47%|████▋ | 859/1827 [00:10<00:11, 84.83it/s]
1726
  48%|████▊ | 868/1827 [00:10<00:11, 84.11it/s]
1727
  48%|████▊ | 877/1827 [00:10<00:11, 83.69it/s]
1728
  48%|████▊ | 886/1827 [00:10<00:11, 84.15it/s]
1729
  49%|████▉ | 895/1827 [00:10<00:11, 83.37it/s]
1730
  49%|████▉ | 904/1827 [00:11<00:10, 84.07it/s]
1731
  50%|████▉ | 913/1827 [00:11<00:10, 84.82it/s]
1732
  50%|█████ | 922/1827 [00:11<00:10, 84.51it/s]
1733
  51%|█████ | 931/1827 [00:11<00:10, 81.89it/s]
1734
  51%|█████▏ | 940/1827 [00:11<00:10, 82.43it/s]
1735
  52%|█████▏ | 949/1827 [00:11<00:10, 81.02it/s]
1736
  52%|█████▏ | 958/1827 [00:11<00:10, 81.56it/s]
1737
  53%|█████▎ | 967/1827 [00:11<00:10, 83.24it/s]
1738
  53%|█████▎ | 976/1827 [00:11<00:10, 82.41it/s]
1739
  54%|█████▍ | 985/1827 [00:12<00:10, 83.96it/s]
1740
  54%|█████▍ | 994/1827 [00:12<00:09, 84.30it/s]
1741
  55%|█████▍ | 1003/1827 [00:12<00:09, 83.87it/s]
1742
  55%|█████▌ | 1012/1827 [00:12<00:09, 83.40it/s]
1743
  56%|█████▌ | 1021/1827 [00:12<00:09, 84.27it/s]
1744
  56%|█████▋ | 1030/1827 [00:12<00:09, 84.53it/s]
1745
  57%|█████▋ | 1039/1827 [00:12<00:09, 82.78it/s]
1746
  57%|█████▋ | 1048/1827 [00:12<00:09, 83.18it/s]
1747
  58%|█████▊ | 1057/1827 [00:12<00:09, 83.19it/s]
1748
  58%|█████▊ | 1066/1827 [00:13<00:09, 84.07it/s]
1749
  59%|█████▉ | 1075/1827 [00:13<00:08, 84.64it/s]
1750
  59%|█████▉ | 1084/1827 [00:13<00:08, 85.01it/s]
1751
  60%|█████▉ | 1093/1827 [00:13<00:08, 85.36it/s]
1752
  60%|██████ | 1102/1827 [00:13<00:08, 84.17it/s]
1753
  61%|██████ | 1111/1827 [00:13<00:08, 82.68it/s]
1754
  61%|██████▏ | 1120/1827 [00:13<00:08, 81.76it/s]
1755
  62%|██████▏ | 1129/1827 [00:13<00:08, 81.92it/s]
1756
  62%|██████▏ | 1138/1827 [00:13<00:08, 81.83it/s]
1757
  63%|██████▎ | 1147/1827 [00:13<00:08, 82.31it/s]
1758
  63%|██████▎ | 1156/1827 [00:14<00:08, 82.02it/s]
1759
  64%|██████▍ | 1165/1827 [00:14<00:08, 79.48it/s]
1760
  64%|██████▍ | 1174/1827 [00:14<00:08, 80.60it/s]
1761
  65%|██████▍ | 1183/1827 [00:14<00:08, 78.91it/s]
1762
  65%|██████▌ | 1192/1827 [00:14<00:07, 81.02it/s]
1763
  66%|██████▌ | 1201/1827 [00:14<00:07, 80.79it/s]
1764
  66%|██████▌ | 1210/1827 [00:14<00:07, 81.06it/s]
1765
  67%|██████▋ | 1219/1827 [00:14<00:07, 79.89it/s]
1766
  67%|██████▋ | 1228/1827 [00:14<00:07, 81.29it/s]
1767
  68%|██████▊ | 1237/1827 [00:15<00:07, 82.50it/s]
1768
  68%|██████▊ | 1246/1827 [00:15<00:07, 82.71it/s]
1769
  69%|██████▊ | 1255/1827 [00:15<00:07, 80.01it/s]
1770
  69%|██████▉ | 1264/1827 [00:15<00:07, 80.07it/s]
1771
  70%|██████▉ | 1273/1827 [00:15<00:06, 81.19it/s]
1772
  70%|███████ | 1282/1827 [00:15<00:06, 82.47it/s]
1773
  71%|███████ | 1291/1827 [00:15<00:06, 83.35it/s]
1774
  71%|███████ | 1300/1827 [00:15<00:06, 83.34it/s]
1775
  72%|███████▏ | 1309/1827 [00:15<00:06, 83.88it/s]
1776
  72%|███████▏ | 1318/1827 [00:16<00:05, 84.84it/s]
1777
  73%|███████▎ | 1327/1827 [00:16<00:05, 83.90it/s]
1778
  73%|███████▎ | 1336/1827 [00:16<00:05, 84.12it/s]
1779
  74%|███████▎ | 1345/1827 [00:16<00:05, 83.22it/s]
1780
  74%|███████▍ | 1354/1827 [00:16<00:05, 82.63it/s]
1781
  75%|███████▍ | 1363/1827 [00:16<00:05, 82.32it/s]
1782
  75%|███████▌ | 1372/1827 [00:16<00:05, 81.87it/s]
1783
  76%|███████▌ | 1381/1827 [00:16<00:05, 82.35it/s]
1784
  76%|███████▌ | 1390/1827 [00:16<00:05, 81.43it/s]
1785
  77%|███████▋ | 1399/1827 [00:17<00:05, 82.08it/s]
1786
  77%|███████▋ | 1408/1827 [00:17<00:05, 82.67it/s]
1787
  78%|███████▊ | 1417/1827 [00:17<00:04, 82.84it/s]
1788
  78%|███████▊ | 1426/1827 [00:17<00:04, 82.22it/s]
1789
  79%|███████▊ | 1435/1827 [00:17<00:04, 81.18it/s]
1790
  79%|███████▉ | 1444/1827 [00:17<00:04, 78.91it/s]
1791
  80%|███████▉ | 1453/1827 [00:17<00:04, 80.47it/s]
1792
  80%|████████ | 1462/1827 [00:17<00:04, 79.66it/s]
1793
  80%|████████ | 1470/1827 [00:17<00:04, 78.26it/s]
1794
  81%|████████ | 1478/1827 [00:18<00:04, 78.65it/s]
1795
  81%|████████▏ | 1487/1827 [00:18<00:04, 79.78it/s]
1796
  82%|████████▏ | 1495/1827 [00:18<00:04, 77.95it/s]
1797
  82%|████████▏ | 1503/1827 [00:18<00:04, 77.75it/s]
1798
  83%|████████▎ | 1512/1827 [00:18<00:03, 79.71it/s]
1799
  83%|████████▎ | 1521/1827 [00:18<00:03, 80.57it/s]
1800
  84%|████████▎ | 1530/1827 [00:18<00:03, 80.96it/s]
1801
  84%|████████▍ | 1539/1827 [00:18<00:03, 81.58it/s]
1802
  85%|████████▍ | 1548/1827 [00:18<00:03, 81.13it/s]
1803
  85%|████████▌ | 1557/1827 [00:19<00:03, 81.30it/s]
1804
  86%|████████▌ | 1566/1827 [00:19<00:03, 81.60it/s]
1805
  86%|████████▌ | 1575/1827 [00:19<00:03, 81.67it/s]
1806
  87%|████████▋ | 1584/1827 [00:19<00:02, 82.53it/s]
1807
  87%|████████▋ | 1593/1827 [00:19<00:02, 82.02it/s]
1808
  88%|████████▊ | 1602/1827 [00:19<00:02, 82.22it/s]
1809
  88%|████████▊ | 1611/1827 [00:19<00:02, 82.92it/s]
1810
  89%|████████▊ | 1620/1827 [00:19<00:02, 79.31it/s]
1811
  89%|████████▉ | 1628/1827 [00:19<00:02, 76.38it/s]
1812
  90%|████████▉ | 1637/1827 [00:20<00:02, 78.37it/s]
1813
  90%|█████████ | 1646/1827 [00:20<00:02, 79.98it/s]
1814
  91%|█████████ | 1655/1827 [00:20<00:02, 81.60it/s]
1815
  91%|█████████ | 1664/1827 [00:20<00:02, 81.06it/s]
1816
  92%|█████████▏| 1673/1827 [00:20<00:01, 81.37it/s]
1817
  92%|█████████▏| 1682/1827 [00:20<00:01, 82.25it/s]
1818
  93%|█████████▎| 1691/1827 [00:20<00:01, 83.16it/s]
1819
  93%|█████████▎| 1700/1827 [00:20<00:01, 81.57it/s]
1820
  94%|█████████▎| 1709/1827 [00:20<00:01, 82.04it/s]
1821
  94%|█████████▍| 1718/1827 [00:21<00:01, 83.23it/s]
1822
  95%|█████████▍| 1727/1827 [00:21<00:01, 80.36it/s]
1823
  95%|█████████▌| 1736/1827 [00:21<00:01, 81.04it/s]
1824
  96%|█████████▌| 1745/1827 [00:21<00:01, 81.89it/s]
1825
  96%|█████████▌| 1754/1827 [00:21<00:00, 82.74it/s]
1826
  96%|█████████▋| 1763/1827 [00:21<00:00, 82.25it/s]
1827
  97%|█████████▋| 1772/1827 [00:21<00:00, 82.64it/s]
1828
  97%|█████████▋| 1781/1827 [00:21<00:00, 83.22it/s]
1829
  98%|█████████▊| 1790/1827 [00:21<00:00, 83.23it/s]
1830
  98%|█████████▊| 1799/1827 [00:22<00:00, 80.14it/s]
1831
  99%|█████████▉| 1808/1827 [00:22<00:00, 81.33it/s]
1832
  99%|█████████▉| 1817/1827 [00:22<00:00, 81.79it/s]
1833
+ [INFO|trainer.py:3503] 2024-09-06 00:04:30,726 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1834
+ [INFO|configuration_utils.py:472] 2024-09-06 00:04:30,727 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1835
+ [INFO|modeling_utils.py:2799] 2024-09-06 00:04:32,122 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1836
+ [INFO|tokenization_utils_base.py:2684] 2024-09-06 00:04:32,123 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1837
+ [INFO|tokenization_utils_base.py:2693] 2024-09-06 00:04:32,123 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1838
+ ***** predict metrics *****
1839
+ predict_accuracy = 0.9438
1840
+ predict_f1 = 0.6366
1841
+ predict_loss = 0.2277
1842
+ predict_precision = 0.5196
1843
+ predict_recall = 0.8216
1844
+ predict_runtime = 0:00:29.10
1845
+ predict_samples_per_second = 502.103
1846
+ predict_steps_per_second = 62.771
1847
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "total_flos": 1.4714840952259542e+16,
4
- "train_loss": 0.002772659832779558,
5
- "train_runtime": 1349.0548,
6
- "train_samples": 29797,
7
- "train_samples_per_second": 220.873,
8
- "train_steps_per_second": 3.454
9
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "total_flos": 1.7176580067661056e+16,
4
+ "train_loss": 0.0812657987344287,
5
+ "train_runtime": 1549.44,
6
+ "train_samples": 34604,
7
+ "train_samples_per_second": 223.332,
8
+ "train_steps_per_second": 3.492
9
  }
trainer_state.json CHANGED
@@ -1,208 +1,215 @@
1
  {
2
- "best_metric": 0.9491525423728814,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2330",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 4660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "eval_accuracy": 0.9989495654084337,
14
- "eval_f1": 0.9351598173515981,
15
- "eval_loss": 0.0030670168343931437,
16
- "eval_precision": 0.9292196007259528,
17
- "eval_recall": 0.9411764705882353,
18
- "eval_runtime": 13.9946,
19
- "eval_samples_per_second": 486.615,
20
- "eval_steps_per_second": 60.88,
21
- "step": 466
22
- },
23
- {
24
- "epoch": 1.0729613733905579,
25
- "grad_norm": 0.07174628973007202,
26
- "learning_rate": 4.4635193133047216e-05,
27
- "loss": 0.0199,
28
  "step": 500
29
  },
30
  {
31
- "epoch": 2.0,
32
- "eval_accuracy": 0.9989358342373021,
33
- "eval_f1": 0.9386834986474301,
34
- "eval_loss": 0.0030621723271906376,
35
- "eval_precision": 0.9212389380530973,
36
- "eval_recall": 0.9568014705882353,
37
- "eval_runtime": 14.0045,
38
- "eval_samples_per_second": 486.272,
39
- "eval_steps_per_second": 60.838,
40
- "step": 932
41
- },
42
- {
43
- "epoch": 2.1459227467811157,
44
- "grad_norm": 0.13906870782375336,
45
- "learning_rate": 3.927038626609442e-05,
46
- "loss": 0.0026,
47
  "step": 1000
48
  },
49
  {
50
- "epoch": 3.0,
51
- "eval_accuracy": 0.9989152374806047,
52
- "eval_f1": 0.9360919540229885,
53
- "eval_loss": 0.004003152716904879,
54
- "eval_precision": 0.9365225390984361,
55
- "eval_recall": 0.9356617647058824,
56
- "eval_runtime": 13.9451,
57
- "eval_samples_per_second": 488.343,
58
- "eval_steps_per_second": 61.097,
59
- "step": 1398
60
- },
61
- {
62
- "epoch": 3.218884120171674,
63
- "grad_norm": 0.14111244678497314,
64
- "learning_rate": 3.3905579399141636e-05,
65
- "loss": 0.0011,
66
  "step": 1500
67
  },
68
  {
69
- "epoch": 4.0,
70
- "eval_accuracy": 0.9987230010847625,
71
- "eval_f1": 0.9308584686774942,
72
- "eval_loss": 0.005216046702116728,
73
- "eval_precision": 0.9400187441424555,
74
- "eval_recall": 0.921875,
75
- "eval_runtime": 14.2384,
76
- "eval_samples_per_second": 478.283,
77
- "eval_steps_per_second": 59.838,
78
- "step": 1864
79
- },
80
- {
81
- "epoch": 4.291845493562231,
82
- "grad_norm": 0.09000600874423981,
83
- "learning_rate": 2.8540772532188842e-05,
84
- "loss": 0.001,
85
  "step": 2000
86
  },
87
  {
88
- "epoch": 5.0,
89
- "eval_accuracy": 0.9989426998228679,
90
- "eval_f1": 0.9491525423728814,
91
- "eval_loss": 0.004777050111442804,
92
- "eval_precision": 0.9461187214611873,
93
- "eval_recall": 0.9522058823529411,
94
- "eval_runtime": 13.9397,
95
- "eval_samples_per_second": 488.533,
96
- "eval_steps_per_second": 61.12,
97
- "step": 2330
98
- },
99
- {
100
- "epoch": 5.364806866952789,
101
- "grad_norm": 0.0045097870752215385,
102
- "learning_rate": 2.3175965665236052e-05,
103
- "loss": 0.0005,
104
  "step": 2500
105
  },
106
  {
107
- "epoch": 6.0,
108
- "eval_accuracy": 0.9988809095527758,
109
- "eval_f1": 0.9448244414044688,
110
- "eval_loss": 0.004644877277314663,
111
- "eval_precision": 0.9375565610859729,
112
- "eval_recall": 0.9522058823529411,
113
- "eval_runtime": 14.0259,
114
- "eval_samples_per_second": 485.531,
115
- "eval_steps_per_second": 60.745,
116
- "step": 2796
117
- },
118
- {
119
- "epoch": 6.437768240343348,
120
- "grad_norm": 0.029634617269039154,
121
- "learning_rate": 1.7811158798283262e-05,
122
- "loss": 0.0004,
123
  "step": 3000
124
  },
125
  {
126
- "epoch": 7.0,
127
- "eval_accuracy": 0.9989564309939994,
128
- "eval_f1": 0.9446460980036298,
129
- "eval_loss": 0.0049773636274039745,
130
- "eval_precision": 0.9327956989247311,
131
- "eval_recall": 0.9568014705882353,
132
- "eval_runtime": 13.9217,
133
- "eval_samples_per_second": 489.166,
134
- "eval_steps_per_second": 61.2,
135
- "step": 3262
136
- },
137
- {
138
- "epoch": 7.510729613733906,
139
- "grad_norm": 0.003798937890678644,
140
- "learning_rate": 1.2446351931330473e-05,
141
- "loss": 0.0002,
142
  "step": 3500
143
  },
144
  {
145
- "epoch": 8.0,
146
- "eval_accuracy": 0.9989221030661705,
147
- "eval_f1": 0.9435520881138136,
148
- "eval_loss": 0.005484889727085829,
149
- "eval_precision": 0.9422548120989918,
150
- "eval_recall": 0.9448529411764706,
151
- "eval_runtime": 13.8923,
152
- "eval_samples_per_second": 490.2,
153
- "eval_steps_per_second": 61.329,
154
- "step": 3728
155
- },
156
- {
157
- "epoch": 8.583690987124463,
158
- "grad_norm": 0.0004711664514616132,
159
- "learning_rate": 7.0815450643776825e-06,
160
- "loss": 0.0001,
161
  "step": 4000
162
  },
163
  {
164
- "epoch": 9.0,
165
- "eval_accuracy": 0.9989426998228679,
166
- "eval_f1": 0.9441903019213176,
167
- "eval_loss": 0.0057435426861047745,
168
- "eval_precision": 0.9398907103825137,
169
- "eval_recall": 0.9485294117647058,
170
- "eval_runtime": 13.9755,
171
- "eval_samples_per_second": 487.281,
172
- "eval_steps_per_second": 60.964,
173
- "step": 4194
174
- },
175
- {
176
- "epoch": 9.656652360515022,
177
- "grad_norm": 0.005946693476289511,
178
- "learning_rate": 1.7167381974248929e-06,
179
- "loss": 0.0001,
180
  "step": 4500
181
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  {
183
  "epoch": 10.0,
184
- "eval_accuracy": 0.9989083718950389,
185
- "eval_f1": 0.9416058394160585,
186
- "eval_loss": 0.005793666001409292,
187
- "eval_precision": 0.9347826086956522,
188
- "eval_recall": 0.9485294117647058,
189
- "eval_runtime": 14.4595,
190
- "eval_samples_per_second": 470.971,
191
- "eval_steps_per_second": 58.923,
192
- "step": 4660
193
  },
194
  {
195
  "epoch": 10.0,
196
- "step": 4660,
197
- "total_flos": 1.4714840952259542e+16,
198
- "train_loss": 0.002772659832779558,
199
- "train_runtime": 1349.0548,
200
- "train_samples_per_second": 220.873,
201
- "train_steps_per_second": 3.454
202
  }
203
  ],
204
  "logging_steps": 500,
205
- "max_steps": 4660,
206
  "num_input_tokens_seen": 0,
207
  "num_train_epochs": 10,
208
  "save_steps": 500,
@@ -218,7 +225,7 @@
218
  "attributes": {}
219
  }
220
  },
221
- "total_flos": 1.4714840952259542e+16,
222
  "train_batch_size": 32,
223
  "trial_name": null,
224
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.45455732567249935,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 5410,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9242144177449169,
13
+ "grad_norm": 1.4528056383132935,
14
+ "learning_rate": 4.537892791127542e-05,
15
+ "loss": 0.3191,
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.8443022505389485,
21
+ "eval_f1": 0.407486125870823,
22
+ "eval_loss": 0.47723615169525146,
23
+ "eval_precision": 0.27250473783954515,
24
+ "eval_recall": 0.8074403369209172,
25
+ "eval_runtime": 14.3908,
26
+ "eval_samples_per_second": 473.218,
27
+ "eval_steps_per_second": 59.204,
28
+ "step": 541
29
+ },
30
+ {
31
+ "epoch": 1.8484288354898335,
32
+ "grad_norm": 1.5587466955184937,
33
+ "learning_rate": 4.075785582255083e-05,
34
+ "loss": 0.1619,
35
  "step": 1000
36
  },
37
  {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.8552871874442172,
40
+ "eval_f1": 0.43975123088883133,
41
+ "eval_loss": 0.4583655595779419,
42
+ "eval_precision": 0.30406737143881024,
43
+ "eval_recall": 0.7941038839494619,
44
+ "eval_runtime": 14.212,
45
+ "eval_samples_per_second": 479.173,
46
+ "eval_steps_per_second": 59.949,
47
+ "step": 1082
48
+ },
49
+ {
50
+ "epoch": 2.7726432532347505,
51
+ "grad_norm": 0.9616082310676575,
52
+ "learning_rate": 3.613678373382625e-05,
53
+ "loss": 0.11,
54
  "step": 1500
55
  },
56
  {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.8435470361267112,
59
+ "eval_f1": 0.4338006724608259,
60
+ "eval_loss": 0.6447410583496094,
61
+ "eval_precision": 0.29758899817216466,
62
+ "eval_recall": 0.7999532054281703,
63
+ "eval_runtime": 14.3233,
64
+ "eval_samples_per_second": 475.451,
65
+ "eval_steps_per_second": 59.484,
66
+ "step": 1623
67
+ },
68
+ {
69
+ "epoch": 3.6968576709796674,
70
+ "grad_norm": 1.4018645286560059,
71
+ "learning_rate": 3.1515711645101665e-05,
72
+ "loss": 0.0764,
73
  "step": 2000
74
  },
75
  {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.8398602166778805,
78
+ "eval_f1": 0.42338430558177587,
79
+ "eval_loss": 0.7413247227668762,
80
+ "eval_precision": 0.2895756219333735,
81
+ "eval_recall": 0.7870846981750117,
82
+ "eval_runtime": 14.5396,
83
+ "eval_samples_per_second": 468.375,
84
+ "eval_steps_per_second": 58.598,
85
+ "step": 2164
86
+ },
87
+ {
88
+ "epoch": 4.621072088724584,
89
+ "grad_norm": 1.0904265642166138,
90
+ "learning_rate": 2.6894639556377083e-05,
91
+ "loss": 0.0567,
92
  "step": 2500
93
  },
94
  {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.8564886649182308,
97
+ "eval_f1": 0.45455732567249935,
98
+ "eval_loss": 0.7005925178527832,
99
+ "eval_precision": 0.3152508603513856,
100
+ "eval_recall": 0.8144595226953674,
101
+ "eval_runtime": 14.2723,
102
+ "eval_samples_per_second": 477.148,
103
+ "eval_steps_per_second": 59.696,
104
+ "step": 2705
105
+ },
106
+ {
107
+ "epoch": 5.545286506469501,
108
+ "grad_norm": 1.240962028503418,
109
+ "learning_rate": 2.2273567467652497e-05,
110
+ "loss": 0.0428,
111
  "step": 3000
112
  },
113
  {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.8504057561069384,
116
+ "eval_f1": 0.44700636942675154,
117
+ "eval_loss": 0.8111857175827026,
118
+ "eval_precision": 0.30710659898477155,
119
+ "eval_recall": 0.8210107627515209,
120
+ "eval_runtime": 14.3991,
121
+ "eval_samples_per_second": 472.946,
122
+ "eval_steps_per_second": 59.17,
123
+ "step": 3246
124
+ },
125
+ {
126
+ "epoch": 6.469500924214418,
127
+ "grad_norm": 0.44737720489501953,
128
+ "learning_rate": 1.7652495378927914e-05,
129
+ "loss": 0.0332,
130
  "step": 3500
131
  },
132
  {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.8532961676301372,
135
+ "eval_f1": 0.4493518337567586,
136
+ "eval_loss": 0.904643714427948,
137
+ "eval_precision": 0.3113658932924077,
138
+ "eval_recall": 0.8069723912026205,
139
+ "eval_runtime": 14.3036,
140
+ "eval_samples_per_second": 476.105,
141
+ "eval_steps_per_second": 59.566,
142
+ "step": 3787
143
+ },
144
+ {
145
+ "epoch": 7.393715341959335,
146
+ "grad_norm": 0.7116318941116333,
147
+ "learning_rate": 1.3031423290203328e-05,
148
+ "loss": 0.0257,
149
  "step": 4000
150
  },
151
  {
152
+ "epoch": 8.0,
153
+ "eval_accuracy": 0.8481538440413583,
154
+ "eval_f1": 0.44435897435897437,
155
+ "eval_loss": 0.9722912907600403,
156
+ "eval_precision": 0.30602154335158044,
157
+ "eval_recall": 0.8109499298081423,
158
+ "eval_runtime": 14.4222,
159
+ "eval_samples_per_second": 472.19,
160
+ "eval_steps_per_second": 59.076,
161
+ "step": 4328
162
+ },
163
+ {
164
+ "epoch": 8.317929759704251,
165
+ "grad_norm": 0.9520462155342102,
166
+ "learning_rate": 8.410351201478742e-06,
167
+ "loss": 0.022,
168
  "step": 4500
169
  },
170
+ {
171
+ "epoch": 9.0,
172
+ "eval_accuracy": 0.850186057368833,
173
+ "eval_f1": 0.44668737060041414,
174
+ "eval_loss": 1.002764105796814,
175
+ "eval_precision": 0.30871042747272404,
176
+ "eval_recall": 0.8076743097800655,
177
+ "eval_runtime": 14.2485,
178
+ "eval_samples_per_second": 477.944,
179
+ "eval_steps_per_second": 59.796,
180
+ "step": 4869
181
+ },
182
+ {
183
+ "epoch": 9.242144177449168,
184
+ "grad_norm": 0.6707109212875366,
185
+ "learning_rate": 3.789279112754159e-06,
186
+ "loss": 0.0181,
187
+ "step": 5000
188
+ },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8533304955579661,
192
+ "eval_f1": 0.45038613797131544,
193
+ "eval_loss": 1.0022608041763306,
194
+ "eval_precision": 0.31162999550965426,
195
+ "eval_recall": 0.8118858212447356,
196
+ "eval_runtime": 14.4559,
197
+ "eval_samples_per_second": 471.089,
198
+ "eval_steps_per_second": 58.938,
199
+ "step": 5410
200
  },
201
  {
202
  "epoch": 10.0,
203
+ "step": 5410,
204
+ "total_flos": 1.7176580067661056e+16,
205
+ "train_loss": 0.0812657987344287,
206
+ "train_runtime": 1549.44,
207
+ "train_samples_per_second": 223.332,
208
+ "train_steps_per_second": 3.492
209
  }
210
  ],
211
  "logging_steps": 500,
212
+ "max_steps": 5410,
213
  "num_input_tokens_seen": 0,
214
  "num_train_epochs": 10,
215
  "save_steps": 500,
 
225
  "attributes": {}
226
  }
227
  },
228
+ "total_flos": 1.7176580067661056e+16,
229
  "train_batch_size": 32,
230
  "trial_name": null,
231
  "trial_params": null