xuancoblab2023 commited on
Commit
a0e6abd
·
verified ·
1 Parent(s): a7780bd

Training in progress, epoch 1

Browse files
Files changed (47) hide show
  1. logs/events.out.tfevents.1709719487.a2333cf40ebd.21010.62 +2 -2
  2. logs/events.out.tfevents.1709720645.a2333cf40ebd.21010.63 +3 -0
  3. model.safetensors +1 -1
  4. run-5/checkpoint-402/config.json +34 -0
  5. run-5/checkpoint-402/model.safetensors +3 -0
  6. run-5/checkpoint-402/optimizer.pt +3 -0
  7. run-5/checkpoint-402/rng_state.pth +3 -0
  8. run-5/checkpoint-402/scheduler.pt +3 -0
  9. run-5/checkpoint-402/special_tokens_map.json +7 -0
  10. run-5/checkpoint-402/tokenizer.json +0 -0
  11. run-5/checkpoint-402/tokenizer_config.json +57 -0
  12. run-5/checkpoint-402/trainer_state.json +141 -0
  13. run-5/checkpoint-402/training_args.bin +3 -0
  14. run-5/checkpoint-402/vocab.txt +0 -0
  15. run-5/checkpoint-670/config.json +34 -0
  16. run-5/checkpoint-670/model.safetensors +3 -0
  17. run-5/checkpoint-670/optimizer.pt +3 -0
  18. run-5/checkpoint-670/rng_state.pth +3 -0
  19. run-5/checkpoint-670/scheduler.pt +3 -0
  20. run-5/checkpoint-670/special_tokens_map.json +7 -0
  21. run-5/checkpoint-670/tokenizer.json +0 -0
  22. run-5/checkpoint-670/tokenizer_config.json +57 -0
  23. run-5/checkpoint-670/trainer_state.json +217 -0
  24. run-5/checkpoint-670/training_args.bin +3 -0
  25. run-5/checkpoint-670/vocab.txt +0 -0
  26. run-6/checkpoint-93/config.json +34 -0
  27. run-6/checkpoint-93/model.safetensors +3 -0
  28. run-6/checkpoint-93/optimizer.pt +3 -0
  29. run-6/checkpoint-93/rng_state.pth +3 -0
  30. run-6/checkpoint-93/scheduler.pt +3 -0
  31. run-6/checkpoint-93/special_tokens_map.json +7 -0
  32. run-6/checkpoint-93/tokenizer.json +0 -0
  33. run-6/checkpoint-93/tokenizer_config.json +57 -0
  34. run-6/checkpoint-93/trainer_state.json +46 -0
  35. run-6/checkpoint-93/training_args.bin +3 -0
  36. run-6/checkpoint-93/vocab.txt +0 -0
  37. run-8/checkpoint-96/model.safetensors +1 -1
  38. run-8/checkpoint-96/optimizer.pt +1 -1
  39. run-8/checkpoint-96/scheduler.pt +1 -1
  40. run-8/checkpoint-96/trainer_state.json +18 -18
  41. run-8/checkpoint-96/training_args.bin +1 -1
  42. run-9/checkpoint-96/model.safetensors +1 -1
  43. run-9/checkpoint-96/optimizer.pt +1 -1
  44. run-9/checkpoint-96/scheduler.pt +1 -1
  45. run-9/checkpoint-96/trainer_state.json +18 -18
  46. run-9/checkpoint-96/training_args.bin +1 -1
  47. training_args.bin +1 -1
logs/events.out.tfevents.1709719487.a2333cf40ebd.21010.62 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c93f9efdd5269965a100fe7d527345579ba6b7b7aa96b7030bd3af44979c547
3
- size 5316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbeac9d614ebb1fb53a9ebcc8f06ed9e51ae33afe4884fa169ab6bcd86b5bdf
3
+ size 11817
logs/events.out.tfevents.1709720645.a2333cf40ebd.21010.63 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b791daf0e1b0ed68b1c0e62303e2d85234d8a88de6027c2c007c71d9e1278dcb
3
+ size 5314
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b81abfa32ad5173708e5a7c5daeb714ab88b27f2272cc5f9512ce4882d1dceff
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091f91bf8e06bfea46fc6caf25daa4a0d13a1d310b2b8d902cc3a58d34b2d3b4
3
  size 17549312
run-5/checkpoint-402/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-5/checkpoint-402/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a0fd11605c626e05c5c7fcd9020be8ae1860c723723c9983d472cdcbd40c9c
3
+ size 17549312
run-5/checkpoint-402/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f00cdfd1c148cb9c87455a788842f729619b2b3c60d9c8105d9cf71b1233638b
3
+ size 35122746
run-5/checkpoint-402/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8f6147105b5cb171058af11810256ae8ec293f62dafd322114780fb990c4a0
3
+ size 14054
run-5/checkpoint-402/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bc4bb9a98d0edebaa2dcf33f9df0b7c6a37ebb34cd3c52ca4d87395ed1e1e2
3
+ size 1064
run-5/checkpoint-402/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-5/checkpoint-402/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-5/checkpoint-402/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-5/checkpoint-402/trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.541095890410959,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-402",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 402,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.22246789932250977,
14
+ "learning_rate": 1.5679734473172616e-05,
15
+ "loss": 0.2458,
16
+ "step": 67
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.1889893114566803,
23
+ "eval_precision": 0.0,
24
+ "eval_recall": 0.0,
25
+ "eval_runtime": 28.3905,
26
+ "eval_samples_per_second": 35.998,
27
+ "eval_steps_per_second": 1.127,
28
+ "step": 67
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 0.2654605507850647,
33
+ "learning_rate": 1.3937541753931215e-05,
34
+ "loss": 0.1861,
35
+ "step": 134
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.5,
40
+ "eval_f1": 0.0,
41
+ "eval_loss": 0.17826829850673676,
42
+ "eval_precision": 0.0,
43
+ "eval_recall": 0.0,
44
+ "eval_runtime": 29.9866,
45
+ "eval_samples_per_second": 34.082,
46
+ "eval_steps_per_second": 1.067,
47
+ "step": 134
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 0.32397571206092834,
52
+ "learning_rate": 1.2195349034689811e-05,
53
+ "loss": 0.1759,
54
+ "step": 201
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.5107632093933464,
59
+ "eval_f1": 0.07063197026022304,
60
+ "eval_loss": 0.16691070795059204,
61
+ "eval_precision": 0.7037037037037037,
62
+ "eval_recall": 0.03718199608610567,
63
+ "eval_runtime": 28.5351,
64
+ "eval_samples_per_second": 35.816,
65
+ "eval_steps_per_second": 1.121,
66
+ "step": 201
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 0.6136437058448792,
71
+ "learning_rate": 1.045315631544841e-05,
72
+ "loss": 0.168,
73
+ "step": 268
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.5362035225048923,
78
+ "eval_f1": 0.18556701030927833,
79
+ "eval_loss": 0.160459965467453,
80
+ "eval_precision": 0.7605633802816901,
81
+ "eval_recall": 0.10567514677103718,
82
+ "eval_runtime": 28.6402,
83
+ "eval_samples_per_second": 35.684,
84
+ "eval_steps_per_second": 1.117,
85
+ "step": 268
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 0.3216162919998169,
90
+ "learning_rate": 8.710963596207009e-06,
91
+ "loss": 0.1647,
92
+ "step": 335
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.5401174168297456,
97
+ "eval_f1": 0.20068027210884357,
98
+ "eval_loss": 0.1576094627380371,
99
+ "eval_precision": 0.7662337662337663,
100
+ "eval_recall": 0.11545988258317025,
101
+ "eval_runtime": 28.3765,
102
+ "eval_samples_per_second": 36.016,
103
+ "eval_steps_per_second": 1.128,
104
+ "step": 335
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 0.45282211899757385,
109
+ "learning_rate": 6.968770876965607e-06,
110
+ "loss": 0.1625,
111
+ "step": 402
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.541095890410959,
116
+ "eval_f1": 0.20373514431239387,
117
+ "eval_loss": 0.15556302666664124,
118
+ "eval_precision": 0.7692307692307693,
119
+ "eval_recall": 0.11741682974559686,
120
+ "eval_runtime": 28.8112,
121
+ "eval_samples_per_second": 35.472,
122
+ "eval_steps_per_second": 1.111,
123
+ "step": 402
124
+ }
125
+ ],
126
+ "logging_steps": 500,
127
+ "max_steps": 670,
128
+ "num_input_tokens_seen": 0,
129
+ "num_train_epochs": 10,
130
+ "save_steps": 500,
131
+ "total_flos": 1414171183680.0,
132
+ "train_batch_size": 46,
133
+ "trial_name": null,
134
+ "trial_params": {
135
+ "alpha": 0.19075569878013487,
136
+ "learning_rate": 1.7421927192414017e-05,
137
+ "num_train_epochs": 10,
138
+ "per_device_train_batch_size": 46,
139
+ "temperature": 24
140
+ }
141
+ }
run-5/checkpoint-402/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557dc8a5a76d4b478175e4b8394a3e21bc66975595c37bce97ba59c7190b899c
3
+ size 4920
run-5/checkpoint-402/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-5/checkpoint-670/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-5/checkpoint-670/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:575ca98bf296b080a72c0d6dd52be9200676461b3e7e7d27907d42ff324aa3ab
3
+ size 17549312
run-5/checkpoint-670/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b13e9195b025f7da017e1830d435fe8b7b8500a6e3509ac023d7f59503d1f1a4
3
+ size 35122746
run-5/checkpoint-670/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9508485dbdb133df6b713b38533dd5eb9442565adc1f6b4e0a9cb742ecd19a
3
+ size 14054
run-5/checkpoint-670/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1b91ab299d310fcb602a86ce4eef9cd66c51133a0e148af12d39b0ce6cae24
3
+ size 1064
run-5/checkpoint-670/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-5/checkpoint-670/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-5/checkpoint-670/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-5/checkpoint-670/trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.541095890410959,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-402",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 670,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.22246789932250977,
14
+ "learning_rate": 1.5679734473172616e-05,
15
+ "loss": 0.2458,
16
+ "step": 67
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.1889893114566803,
23
+ "eval_precision": 0.0,
24
+ "eval_recall": 0.0,
25
+ "eval_runtime": 28.3905,
26
+ "eval_samples_per_second": 35.998,
27
+ "eval_steps_per_second": 1.127,
28
+ "step": 67
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 0.2654605507850647,
33
+ "learning_rate": 1.3937541753931215e-05,
34
+ "loss": 0.1861,
35
+ "step": 134
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.5,
40
+ "eval_f1": 0.0,
41
+ "eval_loss": 0.17826829850673676,
42
+ "eval_precision": 0.0,
43
+ "eval_recall": 0.0,
44
+ "eval_runtime": 29.9866,
45
+ "eval_samples_per_second": 34.082,
46
+ "eval_steps_per_second": 1.067,
47
+ "step": 134
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 0.32397571206092834,
52
+ "learning_rate": 1.2195349034689811e-05,
53
+ "loss": 0.1759,
54
+ "step": 201
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.5107632093933464,
59
+ "eval_f1": 0.07063197026022304,
60
+ "eval_loss": 0.16691070795059204,
61
+ "eval_precision": 0.7037037037037037,
62
+ "eval_recall": 0.03718199608610567,
63
+ "eval_runtime": 28.5351,
64
+ "eval_samples_per_second": 35.816,
65
+ "eval_steps_per_second": 1.121,
66
+ "step": 201
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 0.6136437058448792,
71
+ "learning_rate": 1.045315631544841e-05,
72
+ "loss": 0.168,
73
+ "step": 268
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_accuracy": 0.5362035225048923,
78
+ "eval_f1": 0.18556701030927833,
79
+ "eval_loss": 0.160459965467453,
80
+ "eval_precision": 0.7605633802816901,
81
+ "eval_recall": 0.10567514677103718,
82
+ "eval_runtime": 28.6402,
83
+ "eval_samples_per_second": 35.684,
84
+ "eval_steps_per_second": 1.117,
85
+ "step": 268
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 0.3216162919998169,
90
+ "learning_rate": 8.710963596207009e-06,
91
+ "loss": 0.1647,
92
+ "step": 335
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.5401174168297456,
97
+ "eval_f1": 0.20068027210884357,
98
+ "eval_loss": 0.1576094627380371,
99
+ "eval_precision": 0.7662337662337663,
100
+ "eval_recall": 0.11545988258317025,
101
+ "eval_runtime": 28.3765,
102
+ "eval_samples_per_second": 36.016,
103
+ "eval_steps_per_second": 1.128,
104
+ "step": 335
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 0.45282211899757385,
109
+ "learning_rate": 6.968770876965607e-06,
110
+ "loss": 0.1625,
111
+ "step": 402
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_accuracy": 0.541095890410959,
116
+ "eval_f1": 0.20373514431239387,
117
+ "eval_loss": 0.15556302666664124,
118
+ "eval_precision": 0.7692307692307693,
119
+ "eval_recall": 0.11741682974559686,
120
+ "eval_runtime": 28.8112,
121
+ "eval_samples_per_second": 35.472,
122
+ "eval_steps_per_second": 1.111,
123
+ "step": 402
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "grad_norm": 0.3418940007686615,
128
+ "learning_rate": 5.226578157724205e-06,
129
+ "loss": 0.1611,
130
+ "step": 469
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.541095890410959,
135
+ "eval_f1": 0.20373514431239387,
136
+ "eval_loss": 0.15463578701019287,
137
+ "eval_precision": 0.7692307692307693,
138
+ "eval_recall": 0.11741682974559686,
139
+ "eval_runtime": 28.8835,
140
+ "eval_samples_per_second": 35.384,
141
+ "eval_steps_per_second": 1.108,
142
+ "step": 469
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "grad_norm": 0.3419853448867798,
147
+ "learning_rate": 3.4843854384828036e-06,
148
+ "loss": 0.1596,
149
+ "step": 536
150
+ },
151
+ {
152
+ "epoch": 8.0,
153
+ "eval_accuracy": 0.541095890410959,
154
+ "eval_f1": 0.20373514431239387,
155
+ "eval_loss": 0.15391579270362854,
156
+ "eval_precision": 0.7692307692307693,
157
+ "eval_recall": 0.11741682974559686,
158
+ "eval_runtime": 28.6467,
159
+ "eval_samples_per_second": 35.676,
160
+ "eval_steps_per_second": 1.117,
161
+ "step": 536
162
+ },
163
+ {
164
+ "epoch": 9.0,
165
+ "grad_norm": 0.3018151521682739,
166
+ "learning_rate": 1.7421927192414018e-06,
167
+ "loss": 0.1589,
168
+ "step": 603
169
+ },
170
+ {
171
+ "epoch": 9.0,
172
+ "eval_accuracy": 0.541095890410959,
173
+ "eval_f1": 0.20373514431239387,
174
+ "eval_loss": 0.15353241562843323,
175
+ "eval_precision": 0.7692307692307693,
176
+ "eval_recall": 0.11741682974559686,
177
+ "eval_runtime": 28.7082,
178
+ "eval_samples_per_second": 35.6,
179
+ "eval_steps_per_second": 1.115,
180
+ "step": 603
181
+ },
182
+ {
183
+ "epoch": 10.0,
184
+ "grad_norm": 0.4547845721244812,
185
+ "learning_rate": 0.0,
186
+ "loss": 0.1592,
187
+ "step": 670
188
+ },
189
+ {
190
+ "epoch": 10.0,
191
+ "eval_accuracy": 0.541095890410959,
192
+ "eval_f1": 0.20373514431239387,
193
+ "eval_loss": 0.15339058637619019,
194
+ "eval_precision": 0.7692307692307693,
195
+ "eval_recall": 0.11741682974559686,
196
+ "eval_runtime": 28.3962,
197
+ "eval_samples_per_second": 35.991,
198
+ "eval_steps_per_second": 1.127,
199
+ "step": 670
200
+ }
201
+ ],
202
+ "logging_steps": 500,
203
+ "max_steps": 670,
204
+ "num_input_tokens_seen": 0,
205
+ "num_train_epochs": 10,
206
+ "save_steps": 500,
207
+ "total_flos": 2356951972800.0,
208
+ "train_batch_size": 46,
209
+ "trial_name": null,
210
+ "trial_params": {
211
+ "alpha": 0.19075569878013487,
212
+ "learning_rate": 1.7421927192414017e-05,
213
+ "num_train_epochs": 10,
214
+ "per_device_train_batch_size": 46,
215
+ "temperature": 24
216
+ }
217
+ }
run-5/checkpoint-670/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557dc8a5a76d4b478175e4b8394a3e21bc66975595c37bce97ba59c7190b899c
3
+ size 4920
run-5/checkpoint-670/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-6/checkpoint-93/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "id2label": {
12
+ "0": "negative",
13
+ "1": "positive"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 512,
17
+ "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 512,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 2,
25
+ "num_hidden_layers": 2,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.38.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 30522
34
+ }
run-6/checkpoint-93/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091f91bf8e06bfea46fc6caf25daa4a0d13a1d310b2b8d902cc3a58d34b2d3b4
3
+ size 17549312
run-6/checkpoint-93/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c4dc4e6a4705d6a4aa925ed18fcba0c5107d6b189857077c1e8b1548e5eb876
3
+ size 35122746
run-6/checkpoint-93/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b8e331f3380aab23e9eb3e617f6b97a18048bd47a3ea8f14cf82dde08be706
3
+ size 14054
run-6/checkpoint-93/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80568e1169ca26ecc896bfbd4e7e169354eb0bf7282ec144987a58ee6ae0650
3
+ size 1064
run-6/checkpoint-93/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-6/checkpoint-93/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-6/checkpoint-93/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
run-6/checkpoint-93/trainer_state.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-93",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 93,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.4286664128303528,
14
+ "learning_rate": 1.945409831472016e-05,
15
+ "loss": 0.4806,
16
+ "step": 93
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.5,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.4703535735607147,
23
+ "eval_precision": 0.0,
24
+ "eval_recall": 0.0,
25
+ "eval_runtime": 28.42,
26
+ "eval_samples_per_second": 35.961,
27
+ "eval_steps_per_second": 1.126,
28
+ "step": 93
29
+ }
30
+ ],
31
+ "logging_steps": 500,
32
+ "max_steps": 837,
33
+ "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 9,
35
+ "save_steps": 500,
36
+ "total_flos": 235695197280.0,
37
+ "train_batch_size": 33,
38
+ "trial_name": null,
39
+ "trial_params": {
40
+ "alpha": 0.6122687021783514,
41
+ "learning_rate": 2.188586060406018e-05,
42
+ "num_train_epochs": 9,
43
+ "per_device_train_batch_size": 33,
44
+ "temperature": 14
45
+ }
46
+ }
run-6/checkpoint-93/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40b6510f54e1dab2cd156ebde5daec51272f5467eda7ac498712f745cbb3237
3
+ size 4920
run-6/checkpoint-93/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-8/checkpoint-96/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:415159172ccea99175bbf0b16be3b73dce96c89652c91701080b51ac71fb25ea
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84bec5042b87086a45dca7b3c56d04cf79d83ed1b52a762adc677152de33272c
3
  size 17549312
run-8/checkpoint-96/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac5e0f8916248b91ce3c39f2a229a973b917d0137dfcefc765b085f79f699292
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da6291d75080430d6c4ab323e31c11d497e573a325f8f4db8e787327b6178a6
3
  size 35122746
run-8/checkpoint-96/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a987235109284c8b908892c9fe6cf9e664fed2bf7d9115e29e411d71c61a9b90
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d32b52b4cbac032624e075d8b37b40470a77d3ab11ecb407b7b2017aefcbcc
3
  size 1064
run-8/checkpoint-96/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7602739726027398,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-8/checkpoint-96",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,36 +10,36 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 3.2297720909118652,
14
- "learning_rate": 0.0003356185870926135,
15
- "loss": 0.5529,
16
  "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7602739726027398,
21
- "eval_f1": 0.7973531844499586,
22
- "eval_loss": 0.4773445129394531,
23
- "eval_precision": 0.6905444126074498,
24
- "eval_recall": 0.9432485322896281,
25
- "eval_runtime": 28.1534,
26
- "eval_samples_per_second": 36.301,
27
- "eval_steps_per_second": 1.137,
28
  "step": 96
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 576,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 6,
35
  "save_steps": 500,
36
  "total_flos": 235695197280.0,
37
  "train_batch_size": 32,
38
  "trial_name": null,
39
  "trial_params": {
40
- "alpha": 0.8957226795607325,
41
- "learning_rate": 0.0004027423045111362,
42
- "num_train_epochs": 6,
43
- "temperature": 3
44
  }
45
  }
 
1
  {
2
+ "best_metric": 0.5,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-8/checkpoint-96",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.41845789551734924,
14
+ "learning_rate": 1.3576479969391517e-05,
15
+ "loss": 0.1501,
16
  "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.0910131111741066,
23
+ "eval_precision": 0.0,
24
+ "eval_recall": 0.0,
25
+ "eval_runtime": 28.1809,
26
+ "eval_samples_per_second": 36.266,
27
+ "eval_steps_per_second": 1.136,
28
  "step": 96
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 480,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 5,
35
  "save_steps": 500,
36
  "total_flos": 235695197280.0,
37
  "train_batch_size": 32,
38
  "trial_name": null,
39
  "trial_params": {
40
+ "alpha": 0.06528729720326021,
41
+ "learning_rate": 1.6970599961739396e-05,
42
+ "num_train_epochs": 5,
43
+ "temperature": 4
44
  }
45
  }
run-8/checkpoint-96/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c62ca979c251b24426f83ee19cf958429aed20a5e41a6549bd0708ed8286fb21
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669b7645cf6dd2271363aa4a300b453105055ad2059ef2460a17a64f32148151
3
  size 4920
run-9/checkpoint-96/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:239dd84c1acbf3926d1b2ffcc0c8dbbb872ee0abbb6b9b92cf4944b98a755de2
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d8d6934d852376a5bc13fbfb17016d4c3adcd7b7bdd42a9aab8b9b5d9f62a1b
3
  size 17549312
run-9/checkpoint-96/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb67d3ee336b55baf61cb096cd593c5a926ac525753c86191a6d55862e3338d9
3
  size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b565c370bd07ca4b61cdb8cbc27c9334a05f68332f9e1d6be3633edc32ad6c
3
  size 35122746
run-9/checkpoint-96/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e91027d86c4ea0d43b412d03e2ef80532fe534e4a3c1c0000c6005738343a2bd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d2214d797b0d557f68148a57b0b4d53efab48f681a8a8f578594a80924ae16
3
  size 1064
run-9/checkpoint-96/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7514677103718199,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-9/checkpoint-96",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,36 +10,36 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.0101799964904785,
14
- "learning_rate": 5.9842495749639135e-05,
15
- "loss": 0.6452,
16
  "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7514677103718199,
21
- "eval_f1": 0.7806563039723661,
22
- "eval_loss": 0.5494381785392761,
23
- "eval_precision": 0.6986089644513137,
24
- "eval_recall": 0.8845401174168297,
25
- "eval_runtime": 28.605,
26
- "eval_samples_per_second": 35.728,
27
- "eval_steps_per_second": 1.119,
28
  "step": 96
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 384,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 4,
35
  "save_steps": 500,
36
  "total_flos": 235695197280.0,
37
  "train_batch_size": 32,
38
  "trial_name": null,
39
  "trial_params": {
40
- "alpha": 0.9656943950307342,
41
- "learning_rate": 7.978999433285218e-05,
42
- "num_train_epochs": 4,
43
- "temperature": 18
44
  }
45
  }
 
1
  {
2
+ "best_metric": 0.50293542074364,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-9/checkpoint-96",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.7869714498519897,
14
+ "learning_rate": 1.3073475217173054e-05,
15
+ "loss": 0.6637,
16
  "step": 96
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.50293542074364,
21
+ "eval_f1": 0.03422053231939163,
22
+ "eval_loss": 0.6569339036941528,
23
+ "eval_precision": 0.6,
24
+ "eval_recall": 0.01761252446183953,
25
+ "eval_runtime": 28.4278,
26
+ "eval_samples_per_second": 35.951,
27
+ "eval_steps_per_second": 1.126,
28
  "step": 96
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 768,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 8,
35
  "save_steps": 500,
36
  "total_flos": 235695197280.0,
37
  "train_batch_size": 32,
38
  "trial_name": null,
39
  "trial_params": {
40
+ "alpha": 0.9372925528810219,
41
+ "learning_rate": 1.4941114533912061e-05,
42
+ "num_train_epochs": 8,
43
+ "temperature": 17
44
  }
45
  }
run-9/checkpoint-96/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb87382f1b3aca8f77e985c5e3b04a5f9ce84af6f6fa934f965157269a10afa9
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c88ab003cb8d443d320a7d2a50b7663500e4bdf0605d3f2691a06db8acbcf72
3
  size 4920
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb87382f1b3aca8f77e985c5e3b04a5f9ce84af6f6fa934f965157269a10afa9
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40b6510f54e1dab2cd156ebde5daec51272f5467eda7ac498712f745cbb3237
3
  size 4920