File size: 8,493 Bytes
10f7e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
{
  "best_metric": 0.6666666666666666,
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-214",
  "epoch": 14.0,
  "eval_steps": 500,
  "global_step": 2996,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 0.3824027478694916,
      "learning_rate": 5.531898354978819e-05,
      "loss": 0.0835,
      "step": 214
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.6666666666666666,
      "eval_f1": 0.0,
      "eval_loss": 0.06421080976724625,
      "eval_mcc": 0.0,
      "eval_precision": 0.0,
      "eval_recall": 0.0,
      "eval_runtime": 3.1927,
      "eval_samples_per_second": 534.663,
      "eval_steps_per_second": 16.914,
      "step": 214
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.4931301176548004,
      "learning_rate": 5.10636771228814e-05,
      "loss": 0.066,
      "step": 428
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.6666666666666666,
      "eval_f1": 0.006980802792321117,
      "eval_loss": 0.06297493726015091,
      "eval_mcc": 0.01713474628469157,
      "eval_precision": 0.5,
      "eval_recall": 0.0035149384885764497,
      "eval_runtime": 3.3168,
      "eval_samples_per_second": 514.655,
      "eval_steps_per_second": 16.281,
      "step": 428
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.38297247886657715,
      "learning_rate": 4.6808370695974625e-05,
      "loss": 0.0648,
      "step": 642
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.6666666666666666,
      "eval_f1": 0.013864818024263433,
      "eval_loss": 0.06352359801530838,
      "eval_mcc": 0.024260699053001704,
      "eval_precision": 0.5,
      "eval_recall": 0.007029876977152899,
      "eval_runtime": 3.1502,
      "eval_samples_per_second": 541.864,
      "eval_steps_per_second": 17.142,
      "step": 642
    },
    {
      "epoch": 4.0,
      "grad_norm": 0.4884311258792877,
      "learning_rate": 4.255306426906784e-05,
      "loss": 0.0642,
      "step": 856
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.6666666666666666,
      "eval_f1": 0.017271157167530225,
      "eval_loss": 0.06221030279994011,
      "eval_mcc": 0.027140265094376777,
      "eval_precision": 0.5,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.7762,
      "eval_samples_per_second": 452.044,
      "eval_steps_per_second": 14.3,
      "step": 856
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.7101069688796997,
      "learning_rate": 3.829775784216106e-05,
      "loss": 0.064,
      "step": 1070
    },
    {
      "epoch": 5.0,
      "eval_accuracy": 0.6666666666666666,
      "eval_f1": 0.017271157167530225,
      "eval_loss": 0.06226570904254913,
      "eval_mcc": 0.027140265094376777,
      "eval_precision": 0.5,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1334,
      "eval_samples_per_second": 544.769,
      "eval_steps_per_second": 17.233,
      "step": 1070
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.42137953639030457,
      "learning_rate": 3.404245141525427e-05,
      "loss": 0.0637,
      "step": 1284
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.6649091974223784,
      "eval_f1": 0.02389078498293515,
      "eval_loss": 0.06161979213356972,
      "eval_mcc": 0.016686958293742785,
      "eval_precision": 0.4117647058823529,
      "eval_recall": 0.012302284710017574,
      "eval_runtime": 3.2462,
      "eval_samples_per_second": 525.852,
      "eval_steps_per_second": 16.635,
      "step": 1284
    },
    {
      "epoch": 7.0,
      "grad_norm": 0.4732053279876709,
      "learning_rate": 2.9787144988347488e-05,
      "loss": 0.0634,
      "step": 1498
    },
    {
      "epoch": 7.0,
      "eval_accuracy": 0.664323374340949,
      "eval_f1": 0.020512820512820513,
      "eval_loss": 0.061539050191640854,
      "eval_mcc": 0.008597718124511362,
      "eval_precision": 0.375,
      "eval_recall": 0.01054481546572935,
      "eval_runtime": 3.1722,
      "eval_samples_per_second": 538.12,
      "eval_steps_per_second": 17.023,
      "step": 1498
    },
    {
      "epoch": 8.0,
      "grad_norm": 0.4758211374282837,
      "learning_rate": 2.55318385614407e-05,
      "loss": 0.0631,
      "step": 1712
    },
    {
      "epoch": 8.0,
      "eval_accuracy": 0.6654950205038078,
      "eval_f1": 0.017211703958691912,
      "eval_loss": 0.061484288424253464,
      "eval_mcc": 0.01487410293271824,
      "eval_precision": 0.4166666666666667,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1952,
      "eval_samples_per_second": 534.244,
      "eval_steps_per_second": 16.901,
      "step": 1712
    },
    {
      "epoch": 9.0,
      "grad_norm": 0.3838660418987274,
      "learning_rate": 2.127653213453392e-05,
      "loss": 0.0632,
      "step": 1926
    },
    {
      "epoch": 9.0,
      "eval_accuracy": 0.6660808435852372,
      "eval_f1": 0.01724137931034483,
      "eval_loss": 0.06174994260072708,
      "eval_mcc": 0.020707884164064556,
      "eval_precision": 0.45454545454545453,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1409,
      "eval_samples_per_second": 543.466,
      "eval_steps_per_second": 17.192,
      "step": 1926
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.3443503677845001,
      "learning_rate": 1.7021225707627134e-05,
      "loss": 0.0629,
      "step": 2140
    },
    {
      "epoch": 10.0,
      "eval_accuracy": 0.664323374340949,
      "eval_f1": 0.017152658662092625,
      "eval_loss": 0.061242878437042236,
      "eval_mcc": 0.004592958330124466,
      "eval_precision": 0.35714285714285715,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1964,
      "eval_samples_per_second": 534.034,
      "eval_steps_per_second": 16.894,
      "step": 2140
    },
    {
      "epoch": 11.0,
      "grad_norm": 0.31307530403137207,
      "learning_rate": 1.276591928072035e-05,
      "loss": 0.0628,
      "step": 2354
    },
    {
      "epoch": 11.0,
      "eval_accuracy": 0.6654950205038078,
      "eval_f1": 0.017211703958691912,
      "eval_loss": 0.061483997851610184,
      "eval_mcc": 0.01487410293271824,
      "eval_precision": 0.4166666666666667,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1572,
      "eval_samples_per_second": 540.674,
      "eval_steps_per_second": 17.104,
      "step": 2354
    },
    {
      "epoch": 12.0,
      "grad_norm": 0.26839011907577515,
      "learning_rate": 8.510612853813567e-06,
      "loss": 0.0628,
      "step": 2568
    },
    {
      "epoch": 12.0,
      "eval_accuracy": 0.6654950205038078,
      "eval_f1": 0.017211703958691912,
      "eval_loss": 0.06137599050998688,
      "eval_mcc": 0.01487410293271824,
      "eval_precision": 0.4166666666666667,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1832,
      "eval_samples_per_second": 536.247,
      "eval_steps_per_second": 16.964,
      "step": 2568
    },
    {
      "epoch": 13.0,
      "grad_norm": 0.8179745674133301,
      "learning_rate": 4.2553064269067835e-06,
      "loss": 0.0626,
      "step": 2782
    },
    {
      "epoch": 13.0,
      "eval_accuracy": 0.6654950205038078,
      "eval_f1": 0.017211703958691912,
      "eval_loss": 0.06123984605073929,
      "eval_mcc": 0.01487410293271824,
      "eval_precision": 0.4166666666666667,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.1468,
      "eval_samples_per_second": 542.461,
      "eval_steps_per_second": 17.16,
      "step": 2782
    },
    {
      "epoch": 14.0,
      "grad_norm": 0.34526437520980835,
      "learning_rate": 0.0,
      "loss": 0.0624,
      "step": 2996
    },
    {
      "epoch": 14.0,
      "eval_accuracy": 0.6654950205038078,
      "eval_f1": 0.017211703958691912,
      "eval_loss": 0.061105918139219284,
      "eval_mcc": 0.01487410293271824,
      "eval_precision": 0.4166666666666667,
      "eval_recall": 0.008787346221441126,
      "eval_runtime": 3.9414,
      "eval_samples_per_second": 433.098,
      "eval_steps_per_second": 13.701,
      "step": 2996
    }
  ],
  "logging_steps": 500,
  "max_steps": 2996,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 14,
  "save_steps": 500,
  "total_flos": 7346859302160.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": {
    "alpha": 0.08651897585698409,
    "learning_rate": 5.9574289976694975e-05,
    "num_train_epochs": 14,
    "temperature": 43
  }
}