alyzbane commited on
Commit
117d285
·
verified ·
1 Parent(s): 90b8e8f

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ classification_report.png filter=lfs diff=lfs merge=lfs -text
37
+ evaluation/clf_bar.png filter=lfs diff=lfs merge=lfs -text
38
+ evaluation/confusion_matrix.png filter=lfs diff=lfs merge=lfs -text
39
+ train_and_eval.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: microsoft/resnet-50
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - precision
9
+ - recall
10
+ - f1
11
+ - accuracy
12
+ model-index:
13
+ - name: 2025-02-05-21-58-41-resnet-50
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # 2025-02-05-21-58-41-resnet-50
21
+
22
+ This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on an unknown dataset.
23
+ It achieves the following results on the evaluation set:
24
+ - Loss: 0.0762
25
+ - Precision: 0.9810
26
+ - Recall: 0.9805
27
+ - F1: 0.9804
28
+ - Accuracy: 0.9766
29
+ - Top1 Accuracy: 0.9805
30
+ - Error Rate: 0.0234
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 0.0002
50
+ - train_batch_size: 32
51
+ - eval_batch_size: 32
52
+ - seed: 3407
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: linear
55
+ - lr_scheduler_warmup_ratio: 0.1
56
+ - num_epochs: 10
57
+
58
+ ### Training results
59
+
60
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy | Top1 Accuracy | Error Rate |
61
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|:-------------:|:----------:|
62
+ | 2.4636 | 1.0 | 103 | 2.1548 | 0.6867 | 0.6293 | 0.5929 | 0.5824 | 0.6293 | 0.4176 |
63
+ | 1.3967 | 2.0 | 206 | 0.5586 | 0.8893 | 0.8780 | 0.8770 | 0.8743 | 0.8780 | 0.1257 |
64
+ | 0.4328 | 3.0 | 309 | 0.2100 | 0.9565 | 0.9512 | 0.9518 | 0.9524 | 0.9512 | 0.0476 |
65
+ | 0.2544 | 4.0 | 412 | 0.1414 | 0.9628 | 0.9610 | 0.9613 | 0.9588 | 0.9610 | 0.0412 |
66
+ | 0.171 | 5.0 | 515 | 0.1127 | 0.9690 | 0.9683 | 0.9683 | 0.9638 | 0.9683 | 0.0362 |
67
+ | 0.1556 | 6.0 | 618 | 0.0976 | 0.9715 | 0.9707 | 0.9706 | 0.9681 | 0.9707 | 0.0319 |
68
+ | 0.118 | 7.0 | 721 | 0.0762 | 0.9810 | 0.9805 | 0.9804 | 0.9766 | 0.9805 | 0.0234 |
69
+ | 0.1142 | 8.0 | 824 | 0.0853 | 0.9809 | 0.9805 | 0.9804 | 0.9813 | 0.9805 | 0.0187 |
70
+ | 0.0978 | 9.0 | 927 | 0.0798 | 0.9808 | 0.9805 | 0.9803 | 0.9788 | 0.9805 | 0.0212 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - Transformers 4.45.2
76
+ - Pytorch 2.5.1+cu121
77
+ - Datasets 3.2.0
78
+ - Tokenizers 0.20.3
all_results.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.0,
3
+ "eval_accuracy": 0.9765634013174265,
4
+ "eval_error_rate": 0.02343659868257353,
5
+ "eval_f1": 0.9804014205399844,
6
+ "eval_loss": 0.07615971565246582,
7
+ "eval_precision": 0.9810152386446259,
8
+ "eval_recall": 0.9804878048780488,
9
+ "eval_runtime": 8.7366,
10
+ "eval_samples_per_second": 46.929,
11
+ "eval_steps_per_second": 1.488,
12
+ "eval_top1_accuracy": 0.9804878048780488,
13
+ "test_accuracy": 0.9736256821907545,
14
+ "test_error_rate": 0.026374317809245484,
15
+ "test_f1": 0.975524936170449,
16
+ "test_loss": 0.07458387315273285,
17
+ "test_precision": 0.9770910372814001,
18
+ "test_recall": 0.975609756097561,
19
+ "test_runtime": 9.7731,
20
+ "test_samples_per_second": 41.952,
21
+ "test_steps_per_second": 1.33,
22
+ "test_top1_accuracy": 0.975609756097561,
23
+ "total_flos": 6.268961276565443e+17,
24
+ "train_loss": 0.578233896850125,
25
+ "train_runtime": 713.8479,
26
+ "train_samples_per_second": 45.906,
27
+ "train_steps_per_second": 1.443
28
+ }
classification_report.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ Acacia,0.9090909090909091,1.0,0.9523809523809523,30.0
3
+ Coconut,1.0,0.9444444444444444,0.9714285714285714,36.0
4
+ Dau,1.0,0.9354838709677419,0.9666666666666667,31.0
5
+ Dita,0.9512195121951219,1.0,0.975,39.0
6
+ Ilang-ilang,0.9333333333333333,1.0,0.9655172413793104,28.0
7
+ Macarthur,1.0,1.0,1.0,34.0
8
+ Mango,1.0,0.9545454545454546,0.9767441860465116,44.0
9
+ Mulawin,1.0,0.8571428571428571,0.9230769230769231,21.0
10
+ Narra,0.9333333333333333,0.9655172413793104,0.9491525423728814,29.0
11
+ Palmera,1.0,1.0,1.0,39.0
12
+ Royal Palm,1.0,1.0,1.0,24.0
13
+ Santol,1.0,1.0,1.0,29.0
14
+ Tabebuia,0.9629629629629629,1.0,0.9811320754716981,26.0
15
+ accuracy,0.975609756097561,0.975609756097561,0.975609756097561,0.975609756097561
16
+ macro avg,0.97614923468582,0.9736256821907545,0.9739307045248858,410.0
17
+ weighted avg,0.9770910372814001,0.975609756097561,0.975524936170449,410.0
classification_report.png ADDED

Git LFS Details

  • SHA256: abdebc95b93f31887a72f4292c42b0ca540ece4fc9449d5c384cf415865682d8
  • Pointer size: 131 Bytes
  • Size of remote file: 351 kB
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-50",
3
+ "architectures": [
4
+ "ResNetForImageClassification"
5
+ ],
6
+ "depths": [
7
+ 3,
8
+ 4,
9
+ 6,
10
+ 3
11
+ ],
12
+ "downsample_in_bottleneck": false,
13
+ "downsample_in_first_stage": false,
14
+ "embedding_size": 64,
15
+ "hidden_act": "relu",
16
+ "hidden_sizes": [
17
+ 256,
18
+ 512,
19
+ 1024,
20
+ 2048
21
+ ],
22
+ "id2label": {
23
+ "0": "Acacia",
24
+ "1": "Coconut",
25
+ "2": "Dau",
26
+ "3": "Dita",
27
+ "4": "Ilang-ilang",
28
+ "5": "Macarthur",
29
+ "6": "Mango",
30
+ "7": "Mulawin",
31
+ "8": "Narra",
32
+ "9": "Palmera",
33
+ "10": "Royal Palm",
34
+ "11": "Santol",
35
+ "12": "Tabebuia"
36
+ },
37
+ "label2id": {
38
+ "Acacia": 0,
39
+ "Coconut": 1,
40
+ "Dau": 2,
41
+ "Dita": 3,
42
+ "Ilang-ilang": 4,
43
+ "Macarthur": 5,
44
+ "Mango": 6,
45
+ "Mulawin": 7,
46
+ "Narra": 8,
47
+ "Palmera": 9,
48
+ "Royal Palm": 10,
49
+ "Santol": 11,
50
+ "Tabebuia": 12
51
+ },
52
+ "layer_type": "bottleneck",
53
+ "model_type": "resnet",
54
+ "num_channels": 3,
55
+ "out_features": [
56
+ "stage4"
57
+ ],
58
+ "out_indices": [
59
+ 4
60
+ ],
61
+ "stage_names": [
62
+ "stem",
63
+ "stage1",
64
+ "stage2",
65
+ "stage3",
66
+ "stage4"
67
+ ],
68
+ "torch_dtype": "float32",
69
+ "transformers_version": "4.45.2"
70
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.0,
3
+ "eval_accuracy": 0.9765634013174265,
4
+ "eval_error_rate": 0.02343659868257353,
5
+ "eval_f1": 0.9804014205399844,
6
+ "eval_loss": 0.07615971565246582,
7
+ "eval_precision": 0.9810152386446259,
8
+ "eval_recall": 0.9804878048780488,
9
+ "eval_runtime": 8.7366,
10
+ "eval_samples_per_second": 46.929,
11
+ "eval_steps_per_second": 1.488,
12
+ "eval_top1_accuracy": 0.9804878048780488
13
+ }
evaluation/classification_report.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ Acacia,0.9090909090909091,1.0,0.9523809523809523,30.0
3
+ Coconut,1.0,0.9444444444444444,0.9714285714285714,36.0
4
+ Dau,1.0,0.9354838709677419,0.9666666666666667,31.0
5
+ Dita,0.9512195121951219,1.0,0.975,39.0
6
+ Ilang-ilang,0.9333333333333333,1.0,0.9655172413793104,28.0
7
+ Macarthur,1.0,1.0,1.0,34.0
8
+ Mango,1.0,0.9545454545454546,0.9767441860465116,44.0
9
+ Mulawin,1.0,0.8571428571428571,0.9230769230769231,21.0
10
+ Narra,0.9333333333333333,0.9655172413793104,0.9491525423728814,29.0
11
+ Palmera,1.0,1.0,1.0,39.0
12
+ Royal Palm,1.0,1.0,1.0,24.0
13
+ Santol,1.0,1.0,1.0,29.0
14
+ Tabebuia,0.9629629629629629,1.0,0.9811320754716981,26.0
15
+ accuracy,0.975609756097561,0.975609756097561,0.975609756097561,0.975609756097561
16
+ macro avg,0.97614923468582,0.9736256821907545,0.9739307045248858,410.0
17
+ weighted avg,0.9770910372814001,0.975609756097561,0.975524936170449,410.0
evaluation/clf_bar.png ADDED

Git LFS Details

  • SHA256: 52055907052322a0d5eda3ce213ee1869ddb6c0449877f3f5bad9046b2e1b881
  • Pointer size: 131 Bytes
  • Size of remote file: 257 kB
evaluation/confusion_matrix.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,Acacia,Coconut,Dau,Dita,Ilang-ilang,Macarthur,Mango,Mulawin,Narra,Palmera,Royal Palm,Santol,Tabebuia
2
+ Acacia,30,0,0,0,0,0,0,0,0,0,0,0,0
3
+ Coconut,1,34,0,1,0,0,0,0,0,0,0,0,0
4
+ Dau,0,0,29,0,2,0,0,0,0,0,0,0,0
5
+ Dita,0,0,0,39,0,0,0,0,0,0,0,0,0
6
+ Ilang-ilang,0,0,0,0,28,0,0,0,0,0,0,0,0
7
+ Macarthur,0,0,0,0,0,34,0,0,0,0,0,0,0
8
+ Mango,0,0,0,0,0,0,42,0,2,0,0,0,0
9
+ Mulawin,2,0,0,1,0,0,0,18,0,0,0,0,0
10
+ Narra,0,0,0,0,0,0,0,0,28,0,0,0,1
11
+ Palmera,0,0,0,0,0,0,0,0,0,39,0,0,0
12
+ Royal Palm,0,0,0,0,0,0,0,0,0,0,24,0,0
13
+ Santol,0,0,0,0,0,0,0,0,0,0,0,29,0
14
+ Tabebuia,0,0,0,0,0,0,0,0,0,0,0,0,26
evaluation/confusion_matrix.png ADDED

Git LFS Details

  • SHA256: 1f0936aad5f66233558dd7a195514aca450b1ef1c383dd616e1e5d2c2774d90f
  • Pointer size: 131 Bytes
  • Size of remote file: 224 kB
evaluation/results.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-02-05 22:13:23,203 - INFO - plot_confusion_matrix - Confusion Matrix:
2
+ [[30 0 0 0 0 0 0 0 0 0 0 0 0]
3
+ [ 1 34 0 1 0 0 0 0 0 0 0 0 0]
4
+ [ 0 0 29 0 2 0 0 0 0 0 0 0 0]
5
+ [ 0 0 0 39 0 0 0 0 0 0 0 0 0]
6
+ [ 0 0 0 0 28 0 0 0 0 0 0 0 0]
7
+ [ 0 0 0 0 0 34 0 0 0 0 0 0 0]
8
+ [ 0 0 0 0 0 0 42 0 2 0 0 0 0]
9
+ [ 2 0 0 1 0 0 0 18 0 0 0 0 0]
10
+ [ 0 0 0 0 0 0 0 0 28 0 0 0 1]
11
+ [ 0 0 0 0 0 0 0 0 0 39 0 0 0]
12
+ [ 0 0 0 0 0 0 0 0 0 0 24 0 0]
13
+ [ 0 0 0 0 0 0 0 0 0 0 0 29 0]
14
+ [ 0 0 0 0 0 0 0 0 0 0 0 0 26]]
15
+ 2025-02-05 22:13:23,764 - INFO - plot_confusion_matrix - Confusion matrix saved to 2025-02-05-21-58-41-resnet-50/evaluation/confusion_matrix.png
16
+ 2025-02-05 22:13:23,922 - INFO - plot_confusion_matrix - Confusion matrix report saved to 2025-02-05-21-58-41-resnet-50/evaluation/confusion_matrix.csv
17
+ 2025-02-05 22:13:28,873 - INFO - classification_report_bar - Classification report saved to 2025-02-05-21-58-41-resnet-50/evaluation/classification_report.csv
18
+ 2025-02-05 22:13:29,374 - INFO - classification_report_bar - Classification report bar chart saved to 2025-02-05-21-58-41-resnet-50/evaluation/clf_bar.png
19
+ 2025-02-05 22:13:29,375 - INFO - classification_report_bar - Overall Accuracy: 0.976
20
+ 2025-02-05 22:13:32,675 - INFO - plot_classification_report_heatmap - Classification report heatmap saved to 2025-02-05-21-58-41-resnet-50/classification_report.png
21
+ 2025-02-05 22:13:32,796 - INFO - plot_classification_report_heatmap - Classification report saved to 2025-02-05-21-58-41-resnet-50/classification_report.csv
22
+ 2025-02-05 22:13:40,000 - INFO - plot_results - Training metrics saved to 2025-02-05-21-58-41-resnet-50/training_metrics.csv
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5292c1c7eb947a6efd76ae4eab8cbac9c75dc73a9f37c08baad88f638ea1a04c
3
+ size 94393148
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_pct": 0.875,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "shortest_edge": 224
21
+ }
22
+ }
test_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.9736256821907545,
3
+ "test_error_rate": 0.026374317809245484,
4
+ "test_f1": 0.975524936170449,
5
+ "test_loss": 0.07458387315273285,
6
+ "test_precision": 0.9770910372814001,
7
+ "test_recall": 0.975609756097561,
8
+ "test_runtime": 9.7731,
9
+ "test_samples_per_second": 41.952,
10
+ "test_steps_per_second": 1.33,
11
+ "test_top1_accuracy": 0.975609756097561
12
+ }
train_and_eval.png ADDED

Git LFS Details

  • SHA256: 06a8db10c9c5b96187e22cdc0c6ad07b9044b3eb0cfd9580bdc942c5efa585dc
  • Pointer size: 131 Bytes
  • Size of remote file: 301 kB
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.0,
3
+ "total_flos": 6.268961276565443e+17,
4
+ "train_loss": 0.578233896850125,
5
+ "train_runtime": 713.8479,
6
+ "train_samples_per_second": 45.906,
7
+ "train_steps_per_second": 1.443
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.07615971565246582,
3
+ "best_model_checkpoint": "2025-02-05-21-58-41-resnet-50/checkpoint-721",
4
+ "epoch": 9.0,
5
+ "eval_steps": 500,
6
+ "global_step": 927,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "train_accuracy": 0.28471162648764115
14
+ },
15
+ {
16
+ "epoch": 1.0,
17
+ "grad_norm": 3.584263563156128,
18
+ "learning_rate": 0.0002,
19
+ "loss": 2.4636,
20
+ "step": 103
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_accuracy": 0.5823726629787873,
25
+ "eval_error_rate": 0.4176273370212127,
26
+ "eval_f1": 0.5929270791957494,
27
+ "eval_loss": 2.154754877090454,
28
+ "eval_precision": 0.6866708444643892,
29
+ "eval_recall": 0.6292682926829268,
30
+ "eval_runtime": 10.0935,
31
+ "eval_samples_per_second": 40.62,
32
+ "eval_steps_per_second": 1.288,
33
+ "eval_top1_accuracy": 0.6292682926829268,
34
+ "step": 103
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "train_accuracy": 0.720368863574722
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "grad_norm": 3.2492337226867676,
43
+ "learning_rate": 0.00017777777777777779,
44
+ "loss": 1.3967,
45
+ "step": 206
46
+ },
47
+ {
48
+ "epoch": 2.0,
49
+ "eval_accuracy": 0.8743437160779054,
50
+ "eval_error_rate": 0.1256562839220946,
51
+ "eval_f1": 0.8769823548484003,
52
+ "eval_loss": 0.5585801601409912,
53
+ "eval_precision": 0.8893317170713972,
54
+ "eval_recall": 0.8780487804878049,
55
+ "eval_runtime": 8.1523,
56
+ "eval_samples_per_second": 50.292,
57
+ "eval_steps_per_second": 1.595,
58
+ "eval_top1_accuracy": 0.8780487804878049,
59
+ "step": 206
60
+ },
61
+ {
62
+ "epoch": 3.0,
63
+ "train_accuracy": 0.8866286954163276
64
+ },
65
+ {
66
+ "epoch": 3.0,
67
+ "grad_norm": 3.393784761428833,
68
+ "learning_rate": 0.00015555555555555556,
69
+ "loss": 0.4328,
70
+ "step": 309
71
+ },
72
+ {
73
+ "epoch": 3.0,
74
+ "eval_accuracy": 0.9524324562338874,
75
+ "eval_error_rate": 0.04756754376611261,
76
+ "eval_f1": 0.9518375743563098,
77
+ "eval_loss": 0.20998375117778778,
78
+ "eval_precision": 0.9565172518406057,
79
+ "eval_recall": 0.9512195121951219,
80
+ "eval_runtime": 8.4452,
81
+ "eval_samples_per_second": 48.548,
82
+ "eval_steps_per_second": 1.539,
83
+ "eval_top1_accuracy": 0.9512195121951219,
84
+ "step": 309
85
+ },
86
+ {
87
+ "epoch": 4.0,
88
+ "train_accuracy": 0.92324382967182
89
+ },
90
+ {
91
+ "epoch": 4.0,
92
+ "grad_norm": 8.674211502075195,
93
+ "learning_rate": 0.00013333333333333334,
94
+ "loss": 0.2544,
95
+ "step": 412
96
+ },
97
+ {
98
+ "epoch": 4.0,
99
+ "eval_accuracy": 0.958823798940078,
100
+ "eval_error_rate": 0.041176201059922035,
101
+ "eval_f1": 0.9612761071488952,
102
+ "eval_loss": 0.1413789838552475,
103
+ "eval_precision": 0.9627985812350435,
104
+ "eval_recall": 0.9609756097560975,
105
+ "eval_runtime": 9.5611,
106
+ "eval_samples_per_second": 42.882,
107
+ "eval_steps_per_second": 1.36,
108
+ "eval_top1_accuracy": 0.9609756097560975,
109
+ "step": 412
110
+ },
111
+ {
112
+ "epoch": 5.0,
113
+ "train_accuracy": 0.9541632763764578
114
+ },
115
+ {
116
+ "epoch": 5.0,
117
+ "grad_norm": 5.053682804107666,
118
+ "learning_rate": 0.00011111111111111112,
119
+ "loss": 0.171,
120
+ "step": 515
121
+ },
122
+ {
123
+ "epoch": 5.0,
124
+ "eval_accuracy": 0.9638319094938057,
125
+ "eval_error_rate": 0.036168090506194295,
126
+ "eval_f1": 0.9682546818665395,
127
+ "eval_loss": 0.11268524825572968,
128
+ "eval_precision": 0.9690166293737246,
129
+ "eval_recall": 0.9682926829268292,
130
+ "eval_runtime": 9.0863,
131
+ "eval_samples_per_second": 45.123,
132
+ "eval_steps_per_second": 1.431,
133
+ "eval_top1_accuracy": 0.9682926829268292,
134
+ "step": 515
135
+ },
136
+ {
137
+ "epoch": 6.0,
138
+ "train_accuracy": 0.9582316246270681
139
+ },
140
+ {
141
+ "epoch": 6.0,
142
+ "grad_norm": 5.191859245300293,
143
+ "learning_rate": 8.888888888888889e-05,
144
+ "loss": 0.1556,
145
+ "step": 618
146
+ },
147
+ {
148
+ "epoch": 6.0,
149
+ "eval_accuracy": 0.9680975082361487,
150
+ "eval_error_rate": 0.03190249176385129,
151
+ "eval_f1": 0.9706467840988272,
152
+ "eval_loss": 0.09764808416366577,
153
+ "eval_precision": 0.9715023474110591,
154
+ "eval_recall": 0.9707317073170731,
155
+ "eval_runtime": 7.867,
156
+ "eval_samples_per_second": 52.116,
157
+ "eval_steps_per_second": 1.652,
158
+ "eval_top1_accuracy": 0.9707317073170731,
159
+ "step": 618
160
+ },
161
+ {
162
+ "epoch": 7.0,
163
+ "train_accuracy": 0.9693517765120694
164
+ },
165
+ {
166
+ "epoch": 7.0,
167
+ "grad_norm": 13.192668914794922,
168
+ "learning_rate": 6.666666666666667e-05,
169
+ "loss": 0.118,
170
+ "step": 721
171
+ },
172
+ {
173
+ "epoch": 7.0,
174
+ "eval_accuracy": 0.9765634013174265,
175
+ "eval_error_rate": 0.02343659868257353,
176
+ "eval_f1": 0.9804014205399844,
177
+ "eval_loss": 0.07615971565246582,
178
+ "eval_precision": 0.9810152386446259,
179
+ "eval_recall": 0.9804878048780488,
180
+ "eval_runtime": 10.4663,
181
+ "eval_samples_per_second": 39.173,
182
+ "eval_steps_per_second": 1.242,
183
+ "eval_top1_accuracy": 0.9804878048780488,
184
+ "step": 721
185
+ },
186
+ {
187
+ "epoch": 8.0,
188
+ "train_accuracy": 0.9712503390290209
189
+ },
190
+ {
191
+ "epoch": 8.0,
192
+ "grad_norm": 2.4042465686798096,
193
+ "learning_rate": 4.4444444444444447e-05,
194
+ "loss": 0.1142,
195
+ "step": 824
196
+ },
197
+ {
198
+ "epoch": 8.0,
199
+ "eval_accuracy": 0.9812897795903163,
200
+ "eval_error_rate": 0.01871022040968373,
201
+ "eval_f1": 0.9803571227304607,
202
+ "eval_loss": 0.08533048629760742,
203
+ "eval_precision": 0.9809065609961366,
204
+ "eval_recall": 0.9804878048780488,
205
+ "eval_runtime": 9.5505,
206
+ "eval_samples_per_second": 42.93,
207
+ "eval_steps_per_second": 1.361,
208
+ "eval_top1_accuracy": 0.9804878048780488,
209
+ "step": 824
210
+ },
211
+ {
212
+ "epoch": 9.0,
213
+ "train_accuracy": 0.9717927854624356
214
+ },
215
+ {
216
+ "epoch": 9.0,
217
+ "grad_norm": 8.842650413513184,
218
+ "learning_rate": 2.2222222222222223e-05,
219
+ "loss": 0.0978,
220
+ "step": 927
221
+ },
222
+ {
223
+ "epoch": 9.0,
224
+ "eval_accuracy": 0.9788130896136262,
225
+ "eval_error_rate": 0.02118691038637377,
226
+ "eval_f1": 0.9803389295415358,
227
+ "eval_loss": 0.07979268580675125,
228
+ "eval_precision": 0.980763088973181,
229
+ "eval_recall": 0.9804878048780488,
230
+ "eval_runtime": 8.3701,
231
+ "eval_samples_per_second": 48.984,
232
+ "eval_steps_per_second": 1.553,
233
+ "eval_top1_accuracy": 0.9804878048780488,
234
+ "step": 927
235
+ },
236
+ {
237
+ "epoch": 9.0,
238
+ "step": 927,
239
+ "total_flos": 6.268961276565443e+17,
240
+ "train_loss": 0.578233896850125,
241
+ "train_runtime": 713.8479,
242
+ "train_samples_per_second": 45.906,
243
+ "train_steps_per_second": 1.443
244
+ }
245
+ ],
246
+ "logging_steps": 500,
247
+ "max_steps": 1030,
248
+ "num_input_tokens_seen": 0,
249
+ "num_train_epochs": 10,
250
+ "save_steps": 500,
251
+ "stateful_callbacks": {
252
+ "EarlyStoppingCallback": {
253
+ "args": {
254
+ "early_stopping_patience": 2,
255
+ "early_stopping_threshold": 0.0
256
+ },
257
+ "attributes": {
258
+ "early_stopping_patience_counter": 2
259
+ }
260
+ },
261
+ "TrainerControl": {
262
+ "args": {
263
+ "should_epoch_stop": false,
264
+ "should_evaluate": false,
265
+ "should_log": false,
266
+ "should_save": true,
267
+ "should_training_stop": true
268
+ },
269
+ "attributes": {}
270
+ }
271
+ },
272
+ "total_flos": 6.268961276565443e+17,
273
+ "train_batch_size": 32,
274
+ "trial_name": null,
275
+ "trial_params": null
276
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74811b4d1d753ccb9394877c84bb953b0f78c54022cd3a1792ce50e5bb5449aa
3
+ size 5176
training_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Epoch,Train Loss,Eval Loss,Train Accuracy,Eval Accuracy
2
+ 1,2.4636,2.154754877090454,0.28471162648764115,0.5823726629787873
3
+ 2,1.3967,0.5585801601409912,0.720368863574722,0.8743437160779054
4
+ 3,0.4328,0.20998375117778778,0.8866286954163276,0.9524324562338874
5
+ 4,0.2544,0.1413789838552475,0.92324382967182,0.958823798940078
6
+ 5,0.171,0.11268524825572968,0.9541632763764578,0.9638319094938057
7
+ 6,0.1556,0.09764808416366577,0.9582316246270681,0.9680975082361487
8
+ 7,0.118,0.07615971565246582,0.9693517765120694,0.9765634013174265
9
+ 8,0.1142,0.08533048629760742,0.9712503390290209,0.9812897795903163
10
+ 9,0.0978,0.07979268580675125,0.9717927854624356,0.9788130896136262