File size: 5,857 Bytes
46b71dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
{
  "best_metric": 0.8414872798434442,
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-960",
  "epoch": 10.0,
  "eval_steps": 500,
  "global_step": 960,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 2.5439341068267822,
      "learning_rate": 0.00025636357557273684,
      "loss": 0.5365,
      "step": 96
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.7935420743639922,
      "eval_f1": 0.8087035358114234,
      "eval_loss": 0.4461754560470581,
      "eval_precision": 0.7533783783783784,
      "eval_recall": 0.87279843444227,
      "eval_runtime": 25.9122,
      "eval_samples_per_second": 39.441,
      "eval_steps_per_second": 1.235,
      "step": 96
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.693763732910156,
      "learning_rate": 0.00022787873384243272,
      "loss": 0.4548,
      "step": 192
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.8003913894324853,
      "eval_f1": 0.8216783216783217,
      "eval_loss": 0.43167614936828613,
      "eval_precision": 0.7424960505529226,
      "eval_recall": 0.9197651663405088,
      "eval_runtime": 25.6332,
      "eval_samples_per_second": 39.87,
      "eval_steps_per_second": 1.248,
      "step": 192
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1225955486297607,
      "learning_rate": 0.00019939389211212863,
      "loss": 0.4276,
      "step": 288
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.812133072407045,
      "eval_f1": 0.8282647584973166,
      "eval_loss": 0.4346017837524414,
      "eval_precision": 0.7627677100494233,
      "eval_recall": 0.9060665362035225,
      "eval_runtime": 25.7026,
      "eval_samples_per_second": 39.762,
      "eval_steps_per_second": 1.245,
      "step": 288
    },
    {
      "epoch": 4.0,
      "grad_norm": 2.9123752117156982,
      "learning_rate": 0.00017090905038182454,
      "loss": 0.4092,
      "step": 384
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.8228962818003914,
      "eval_f1": 0.8297271872060207,
      "eval_loss": 0.3994919955730438,
      "eval_precision": 0.7989130434782609,
      "eval_recall": 0.863013698630137,
      "eval_runtime": 25.5493,
      "eval_samples_per_second": 40.001,
      "eval_steps_per_second": 1.252,
      "step": 384
    },
    {
      "epoch": 5.0,
      "grad_norm": 2.3602194786071777,
      "learning_rate": 0.00014242420865152045,
      "loss": 0.3965,
      "step": 480
    },
    {
      "epoch": 5.0,
      "eval_accuracy": 0.824853228962818,
      "eval_f1": 0.8306527909176915,
      "eval_loss": 0.407778263092041,
      "eval_precision": 0.8040293040293041,
      "eval_recall": 0.8590998043052838,
      "eval_runtime": 25.3461,
      "eval_samples_per_second": 40.322,
      "eval_steps_per_second": 1.263,
      "step": 480
    },
    {
      "epoch": 6.0,
      "grad_norm": 2.538515567779541,
      "learning_rate": 0.00011393936692121636,
      "loss": 0.3968,
      "step": 576
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.8317025440313112,
      "eval_f1": 0.8477876106194692,
      "eval_loss": 0.4041251838207245,
      "eval_precision": 0.7738287560581584,
      "eval_recall": 0.9373776908023483,
      "eval_runtime": 25.09,
      "eval_samples_per_second": 40.733,
      "eval_steps_per_second": 1.275,
      "step": 576
    },
    {
      "epoch": 7.0,
      "grad_norm": 2.204845428466797,
      "learning_rate": 8.545452519091227e-05,
      "loss": 0.3854,
      "step": 672
    },
    {
      "epoch": 7.0,
      "eval_accuracy": 0.8365949119373777,
      "eval_f1": 0.8499550763701708,
      "eval_loss": 0.39838555455207825,
      "eval_precision": 0.7857142857142857,
      "eval_recall": 0.9256360078277887,
      "eval_runtime": 24.9237,
      "eval_samples_per_second": 41.005,
      "eval_steps_per_second": 1.284,
      "step": 672
    },
    {
      "epoch": 8.0,
      "grad_norm": 2.975243091583252,
      "learning_rate": 5.696968346060818e-05,
      "loss": 0.3772,
      "step": 768
    },
    {
      "epoch": 8.0,
      "eval_accuracy": 0.837573385518591,
      "eval_f1": 0.8520499108734402,
      "eval_loss": 0.39499443769454956,
      "eval_precision": 0.7823240589198036,
      "eval_recall": 0.9354207436399217,
      "eval_runtime": 24.9105,
      "eval_samples_per_second": 41.027,
      "eval_steps_per_second": 1.285,
      "step": 768
    },
    {
      "epoch": 9.0,
      "grad_norm": 3.0168142318725586,
      "learning_rate": 2.848484173030409e-05,
      "loss": 0.372,
      "step": 864
    },
    {
      "epoch": 9.0,
      "eval_accuracy": 0.8405088062622309,
      "eval_f1": 0.8500459981600736,
      "eval_loss": 0.3911450505256653,
      "eval_precision": 0.8020833333333334,
      "eval_recall": 0.9041095890410958,
      "eval_runtime": 24.8944,
      "eval_samples_per_second": 41.053,
      "eval_steps_per_second": 1.285,
      "step": 864
    },
    {
      "epoch": 10.0,
      "grad_norm": 3.654553174972534,
      "learning_rate": 0.0,
      "loss": 0.3739,
      "step": 960
    },
    {
      "epoch": 10.0,
      "eval_accuracy": 0.8414872798434442,
      "eval_f1": 0.852994555353902,
      "eval_loss": 0.39094072580337524,
      "eval_precision": 0.7952622673434856,
      "eval_recall": 0.9197651663405088,
      "eval_runtime": 24.9774,
      "eval_samples_per_second": 40.917,
      "eval_steps_per_second": 1.281,
      "step": 960
    }
  ],
  "logging_steps": 500,
  "max_steps": 960,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "total_flos": 2356951972800.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": {
    "alpha": 0.8320855687119669,
    "learning_rate": 0.0002848484173030409,
    "num_train_epochs": 10,
    "temperature": 8
  }
}