xuancoblab2023 commited on
Commit
b5be5cb
·
verified ·
1 Parent(s): dee3c35

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -15,8 +15,8 @@
15
  "initializer_range": 0.02,
16
  "intermediate_size": 512,
17
  "label2id": {
18
- "negative": 0,
19
- "positive": 1
20
  },
21
  "layer_norm_eps": 1e-12,
22
  "max_position_embeddings": 512,
 
15
  "initializer_range": 0.02,
16
  "intermediate_size": 512,
17
  "label2id": {
18
+ "negative": "0",
19
+ "positive": "1"
20
  },
21
  "layer_norm_eps": 1e-12,
22
  "max_position_embeddings": 512,
logs/events.out.tfevents.1711296858.8cc2eef2edb7.2942.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2639c1c7eed6ff11aac2694a7d4bb784db95973d8617cf620672b55879543225
3
- size 6137
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec5f8a9d179cd75412d3f476893b961e415378f7736f0264bbfd87b7d5a61d3
3
+ size 11601
logs/events.out.tfevents.1711297031.8cc2eef2edb7.2942.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5671c871dec21b814e238027bc460e37eeca98777d4d78926a461e91685a1999
3
+ size 5406
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c84e6f83ea178d5ec19c03e2313d64e2ee2968539fb044cd51b74d740b3c4f
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8535679c03461b298018d74a95c8d714c32a4f38defdc9aadd1837a148497803
3
  size 17549312
run-2/checkpoint-2673/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-2/checkpoint-2673/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24d3b3773a2cb5eb4fe5d0510281ec4afc468d94f153ea1c4e42557acc72d031
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02cb61417b7bc4627bef9aead38201284ebffcb7218d1cd7f8e50100028fec79
3
  size 17549312
run-2/checkpoint-2673/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaaf77f832395c735d2ca2829839fa8b3de3e358a4b7cfb575a266ac248c48c0
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ae6ef254146e91c7ec1fd3d28b8863e057d4dc9c23f3165352d58d53bb29ef
3
+ size 35123898
run-2/checkpoint-2673/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb49527a73de8c97b0805cc04f8d3e06cb2a963ae0f111d98648316d291a7702
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4a1b62f050c820b6eb360863e5fa90a75f0572b605f7ec9339d28261a796fd
3
+ size 14308
run-2/checkpoint-2673/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d88a949c40227921ebbc201965ce805890fc5d6557c00c377fb1db010a9e5d7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc18bc8ae20249ae705a82acbac424d3d36024accd1565a75c4986b5d70555c
3
  size 1064
run-2/checkpoint-2673/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-2/checkpoint-2673/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5207920792079208,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-594",
4
  "epoch": 9.0,
5
  "eval_steps": 500,
@@ -10,146 +10,182 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.46941760182380676,
14
- "learning_rate": 1.0066413527942762e-05,
15
- "loss": 0.2155,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.497029702970297,
21
- "eval_loss": 0.18417517840862274,
22
- "eval_runtime": 14.6034,
23
- "eval_samples_per_second": 34.581,
24
- "eval_steps_per_second": 1.096,
 
 
 
 
25
  "step": 297
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 0.3071568012237549,
30
- "learning_rate": 8.808111836949918e-06,
31
- "loss": 0.1831,
32
  "step": 594
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.5207920792079208,
37
- "eval_loss": 0.17807520925998688,
38
- "eval_runtime": 14.6044,
39
- "eval_samples_per_second": 34.579,
40
- "eval_steps_per_second": 1.096,
 
 
 
 
41
  "step": 594
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 0.35216400027275085,
46
- "learning_rate": 7.549810145957073e-06,
47
- "loss": 0.1804,
48
  "step": 891
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.5207920792079208,
53
- "eval_loss": 0.17647753655910492,
54
- "eval_runtime": 15.4786,
55
- "eval_samples_per_second": 32.626,
56
- "eval_steps_per_second": 1.034,
 
 
 
 
57
  "step": 891
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 0.26479315757751465,
62
- "learning_rate": 6.291508454964228e-06,
63
- "loss": 0.179,
64
  "step": 1188
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.5168316831683168,
69
- "eval_loss": 0.1757788509130478,
70
- "eval_runtime": 13.9632,
71
- "eval_samples_per_second": 36.166,
72
- "eval_steps_per_second": 1.146,
 
 
 
 
73
  "step": 1188
74
  },
75
  {
76
  "epoch": 5.0,
77
- "grad_norm": 0.33599618077278137,
78
- "learning_rate": 5.033206763971381e-06,
79
- "loss": 0.1781,
80
  "step": 1485
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_accuracy": 0.5168316831683168,
85
- "eval_loss": 0.17515264451503754,
86
- "eval_runtime": 14.2891,
87
- "eval_samples_per_second": 35.342,
88
- "eval_steps_per_second": 1.12,
 
 
 
 
89
  "step": 1485
90
  },
91
  {
92
  "epoch": 6.0,
93
- "grad_norm": 0.21752449870109558,
94
- "learning_rate": 3.7749050729785363e-06,
95
- "loss": 0.1781,
96
  "step": 1782
97
  },
98
  {
99
  "epoch": 6.0,
100
- "eval_accuracy": 0.5168316831683168,
101
- "eval_loss": 0.17485138773918152,
102
- "eval_runtime": 13.9491,
103
- "eval_samples_per_second": 36.203,
104
- "eval_steps_per_second": 1.147,
 
 
 
 
105
  "step": 1782
106
  },
107
  {
108
  "epoch": 7.0,
109
- "grad_norm": 0.2491326630115509,
110
- "learning_rate": 2.5166033819856906e-06,
111
- "loss": 0.1777,
112
  "step": 2079
113
  },
114
  {
115
  "epoch": 7.0,
116
- "eval_accuracy": 0.5168316831683168,
117
- "eval_loss": 0.17470771074295044,
118
- "eval_runtime": 14.4075,
119
- "eval_samples_per_second": 35.051,
120
- "eval_steps_per_second": 1.111,
 
 
 
 
121
  "step": 2079
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 0.5971789956092834,
126
- "learning_rate": 1.2583016909928453e-06,
127
- "loss": 0.1776,
128
  "step": 2376
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_accuracy": 0.5168316831683168,
133
- "eval_loss": 0.17442195117473602,
134
- "eval_runtime": 14.8484,
135
- "eval_samples_per_second": 34.011,
136
- "eval_steps_per_second": 1.078,
 
 
 
 
137
  "step": 2376
138
  },
139
  {
140
  "epoch": 9.0,
141
- "grad_norm": 0.2817859947681427,
142
  "learning_rate": 0.0,
143
- "loss": 0.1773,
144
  "step": 2673
145
  },
146
  {
147
  "epoch": 9.0,
148
- "eval_accuracy": 0.5168316831683168,
149
- "eval_loss": 0.17444071173667908,
150
- "eval_runtime": 13.7553,
151
- "eval_samples_per_second": 36.713,
152
- "eval_steps_per_second": 1.163,
 
 
 
 
153
  "step": 2673
154
  }
155
  ],
@@ -158,13 +194,13 @@
158
  "num_input_tokens_seen": 0,
159
  "num_train_epochs": 9,
160
  "save_steps": 500,
161
- "total_flos": 7000589548260.0,
162
  "train_batch_size": 32,
163
  "trial_name": null,
164
  "trial_params": {
165
- "alpha": 0.20650329892275032,
166
- "learning_rate": 1.1324715218935609e-05,
167
  "num_train_epochs": 9,
168
- "temperature": 10
169
  }
170
  }
 
1
  {
2
+ "best_metric": 0.5128712871287129,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-594",
4
  "epoch": 9.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.6258772015571594,
14
+ "learning_rate": 1.171336966673164e-05,
15
+ "loss": 0.303,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.4910891089108911,
21
+ "eval_f1": 0.0,
22
+ "eval_loss": 0.28316354751586914,
23
+ "eval_mcc": -0.09980217586956908,
24
+ "eval_precision": 0.0,
25
+ "eval_recall": 0.0,
26
+ "eval_runtime": 0.9281,
27
+ "eval_samples_per_second": 544.113,
28
+ "eval_steps_per_second": 17.239,
29
  "step": 297
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "grad_norm": 0.44756296277046204,
34
+ "learning_rate": 1.0249198458390185e-05,
35
+ "loss": 0.2807,
36
  "step": 594
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_accuracy": 0.5128712871287129,
41
+ "eval_f1": 0.13986013986013984,
42
+ "eval_loss": 0.2766987085342407,
43
+ "eval_mcc": 0.047945551909639166,
44
+ "eval_precision": 0.5882352941176471,
45
+ "eval_recall": 0.07936507936507936,
46
+ "eval_runtime": 0.9353,
47
+ "eval_samples_per_second": 539.951,
48
+ "eval_steps_per_second": 17.107,
49
  "step": 594
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "grad_norm": 0.431320458650589,
54
+ "learning_rate": 8.785027250048729e-06,
55
+ "loss": 0.2776,
56
  "step": 891
57
  },
58
  {
59
  "epoch": 3.0,
60
+ "eval_accuracy": 0.5128712871287129,
61
+ "eval_f1": 0.14583333333333331,
62
+ "eval_loss": 0.27507784962654114,
63
+ "eval_mcc": 0.046724423554504804,
64
+ "eval_precision": 0.5833333333333334,
65
+ "eval_recall": 0.08333333333333333,
66
+ "eval_runtime": 0.9247,
67
+ "eval_samples_per_second": 546.126,
68
+ "eval_steps_per_second": 17.303,
69
  "step": 891
70
  },
71
  {
72
  "epoch": 4.0,
73
+ "grad_norm": 0.3838063180446625,
74
+ "learning_rate": 7.320856041707276e-06,
75
+ "loss": 0.2774,
76
  "step": 1188
77
  },
78
  {
79
  "epoch": 4.0,
80
+ "eval_accuracy": 0.5128712871287129,
81
+ "eval_f1": 0.14583333333333331,
82
+ "eval_loss": 0.273956835269928,
83
+ "eval_mcc": 0.046724423554504804,
84
+ "eval_precision": 0.5833333333333334,
85
+ "eval_recall": 0.08333333333333333,
86
+ "eval_runtime": 0.9281,
87
+ "eval_samples_per_second": 544.106,
88
+ "eval_steps_per_second": 17.239,
89
  "step": 1188
90
  },
91
  {
92
  "epoch": 5.0,
93
+ "grad_norm": 0.5975275039672852,
94
+ "learning_rate": 5.85668483336582e-06,
95
+ "loss": 0.2766,
96
  "step": 1485
97
  },
98
  {
99
  "epoch": 5.0,
100
+ "eval_accuracy": 0.5108910891089109,
101
+ "eval_f1": 0.13937282229965156,
102
+ "eval_loss": 0.27307695150375366,
103
+ "eval_mcc": 0.039524509362779174,
104
+ "eval_precision": 0.5714285714285714,
105
+ "eval_recall": 0.07936507936507936,
106
+ "eval_runtime": 0.9415,
107
+ "eval_samples_per_second": 536.374,
108
+ "eval_steps_per_second": 16.994,
109
  "step": 1485
110
  },
111
  {
112
  "epoch": 6.0,
113
+ "grad_norm": 0.4973074495792389,
114
+ "learning_rate": 4.3925136250243645e-06,
115
+ "loss": 0.2759,
116
  "step": 1782
117
  },
118
  {
119
  "epoch": 6.0,
120
+ "eval_accuracy": 0.5069306930693069,
121
+ "eval_f1": 0.1384083044982699,
122
+ "eval_loss": 0.27315467596054077,
123
+ "eval_mcc": 0.023354867176075732,
124
+ "eval_precision": 0.5405405405405406,
125
+ "eval_recall": 0.07936507936507936,
126
+ "eval_runtime": 0.9268,
127
+ "eval_samples_per_second": 544.884,
128
+ "eval_steps_per_second": 17.264,
129
  "step": 1782
130
  },
131
  {
132
  "epoch": 7.0,
133
+ "grad_norm": 0.440789133310318,
134
+ "learning_rate": 2.92834241668291e-06,
135
+ "loss": 0.2755,
136
  "step": 2079
137
  },
138
  {
139
  "epoch": 7.0,
140
+ "eval_accuracy": 0.5108910891089109,
141
+ "eval_f1": 0.13937282229965156,
142
+ "eval_loss": 0.27257436513900757,
143
+ "eval_mcc": 0.039524509362779174,
144
+ "eval_precision": 0.5714285714285714,
145
+ "eval_recall": 0.07936507936507936,
146
+ "eval_runtime": 0.9289,
147
+ "eval_samples_per_second": 543.634,
148
+ "eval_steps_per_second": 17.224,
149
  "step": 2079
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "grad_norm": 0.6993103623390198,
154
+ "learning_rate": 1.464171208341455e-06,
155
+ "loss": 0.2751,
156
  "step": 2376
157
  },
158
  {
159
  "epoch": 8.0,
160
+ "eval_accuracy": 0.5089108910891089,
161
+ "eval_f1": 0.1267605633802817,
162
+ "eval_loss": 0.27230069041252136,
163
+ "eval_mcc": 0.03302792352019969,
164
+ "eval_precision": 0.5625,
165
+ "eval_recall": 0.07142857142857142,
166
+ "eval_runtime": 0.9319,
167
+ "eval_samples_per_second": 541.932,
168
+ "eval_steps_per_second": 17.17,
169
  "step": 2376
170
  },
171
  {
172
  "epoch": 9.0,
173
+ "grad_norm": 0.5155681371688843,
174
  "learning_rate": 0.0,
175
+ "loss": 0.2752,
176
  "step": 2673
177
  },
178
  {
179
  "epoch": 9.0,
180
+ "eval_accuracy": 0.5069306930693069,
181
+ "eval_f1": 0.1263157894736842,
182
+ "eval_loss": 0.2723012864589691,
183
+ "eval_mcc": 0.024561362814430283,
184
+ "eval_precision": 0.5454545454545454,
185
+ "eval_recall": 0.07142857142857142,
186
+ "eval_runtime": 0.9301,
187
+ "eval_samples_per_second": 542.98,
188
+ "eval_steps_per_second": 17.203,
189
  "step": 2673
190
  }
191
  ],
 
194
  "num_input_tokens_seen": 0,
195
  "num_train_epochs": 9,
196
  "save_steps": 500,
197
+ "total_flos": 6576311393820.0,
198
  "train_batch_size": 32,
199
  "trial_name": null,
200
  "trial_params": {
201
+ "alpha": 0.33782488262757904,
202
+ "learning_rate": 1.3177540875073095e-05,
203
  "num_train_epochs": 9,
204
+ "temperature": 26
205
  }
206
  }
run-2/checkpoint-2673/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53af5913dd7a7ac18c4af82b6aa52534a7091d8ef8c41f1056aa7fa989430b5d
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b3023982056159c033a3a1a25c2d159010d9ff247770a44c7d7d6d0e3bba5b
3
+ size 4920
run-3/checkpoint-297/config.json CHANGED
@@ -27,7 +27,7 @@
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
- "transformers_version": "4.38.2",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
 
27
  "position_embedding_type": "absolute",
28
  "problem_type": "single_label_classification",
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.39.1",
31
  "type_vocab_size": 2,
32
  "use_cache": true,
33
  "vocab_size": 30522
run-3/checkpoint-297/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76220d7fcfb19b8bac2ae99022cae216a90b830bf00b3012d8b613a63d0bf947
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8535679c03461b298018d74a95c8d714c32a4f38defdc9aadd1837a148497803
3
  size 17549312
run-3/checkpoint-297/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2774d92a34e603acd644175167e2e31366f8af947ecea6c85f0689d20f0ae0b8
3
- size 35122746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60be5d5c92b1bb86ec6b7a46b5ac62e4e96cd5078a9b61abbbb3234637c75f50
3
+ size 35123898
run-3/checkpoint-297/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f414017d19e8a66d09e6a16c0bca909eff6c9e5541f54da3f0dba2607378e04d
3
- size 14054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f5e396c574b9b2ecc40d3f9e87d628a8f4be69caef81905a47fcf8895067e6
3
+ size 14308
run-3/checkpoint-297/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dc0e11b8c40cbed76dc3148c9dd20b9b6423efc82e18995f762ffefe8f96c5f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9270fba2490052dbb63d40feaa1bf95c7d144c1550805c5447392f630cb53251
3
  size 1064
run-3/checkpoint-297/tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 33,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 31,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
run-3/checkpoint-297/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5128712871287129,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-297",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,18 +10,22 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.9984288811683655,
14
- "learning_rate": 9.995670829688553e-05,
15
- "loss": 0.5706,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.5128712871287129,
21
- "eval_loss": 0.5624967217445374,
22
- "eval_runtime": 13.6049,
23
- "eval_samples_per_second": 37.119,
24
- "eval_steps_per_second": 1.176,
 
 
 
 
25
  "step": 297
26
  }
27
  ],
@@ -30,13 +34,13 @@
30
  "num_input_tokens_seen": 0,
31
  "num_train_epochs": 3,
32
  "save_steps": 500,
33
- "total_flos": 777843283140.0,
34
  "train_batch_size": 32,
35
  "trial_name": null,
36
  "trial_params": {
37
- "alpha": 0.7785816803005383,
38
- "learning_rate": 0.0001499350624453283,
39
  "num_train_epochs": 3,
40
- "temperature": 16
41
  }
42
  }
 
1
  {
2
+ "best_metric": 0.5168316831683168,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-297",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.8371890187263489,
14
+ "learning_rate": 0.000508582557644255,
15
+ "loss": 0.309,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.5168316831683168,
21
+ "eval_f1": 0.1643835616438356,
22
+ "eval_loss": 0.30045250058174133,
23
+ "eval_mcc": 0.059239742807176775,
24
+ "eval_precision": 0.6,
25
+ "eval_recall": 0.09523809523809523,
26
+ "eval_runtime": 0.9336,
27
+ "eval_samples_per_second": 540.929,
28
+ "eval_steps_per_second": 17.138,
29
  "step": 297
30
  }
31
  ],
 
34
  "num_input_tokens_seen": 0,
35
  "num_train_epochs": 3,
36
  "save_steps": 500,
37
+ "total_flos": 730701265980.0,
38
  "train_batch_size": 32,
39
  "trial_name": null,
40
  "trial_params": {
41
+ "alpha": 0.37059488996882817,
42
+ "learning_rate": 0.0007628738364663827,
43
  "num_train_epochs": 3,
44
+ "temperature": 5
45
  }
46
  }
run-3/checkpoint-297/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9decda4f5411e0ed12b24ed68c82ec0237bdb8018a369738f83b0ca5fb77d628
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77ece67af99569b700b80b7c3d227a73e33d56768e0354e21fbf558f6ec676a
3
+ size 4920
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f62d580257060cdaf08ca2f6bebff447fe36fd6a4ed09c360f3a936d71824ec
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77ece67af99569b700b80b7c3d227a73e33d56768e0354e21fbf558f6ec676a
3
  size 4920