samtuckervegan commited on
Commit
72e4137
·
verified ·
1 Parent(s): 154cd4c

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-regression
6
+ base_model: distilbert/distilbert-base-uncased
7
+ widget:
8
+ - text: "I love AutoTrain"
9
+ datasets:
10
+ - samtuckervegan/text_performance
11
+ ---
12
+
13
+ # Model Trained Using AutoTrain
14
+
15
+ - Problem type: Text Regression
16
+
17
+ ## Validation Metrics
18
+ loss: 0.03380444645881653
19
+
20
+ mse: 0.033803146332502365
21
+
22
+ mae: 0.14292894303798676
23
+
24
+ r2: 0.2760580778121948
25
+
26
+ rmse: 0.18385631980571776
27
+
28
+ explained_variance: 0.2763633728027344
checkpoint-4916/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased",
3
+ "_num_labels": 1,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "target"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "target": 0
18
+ },
19
+ "max_position_embeddings": 512,
20
+ "model_type": "distilbert",
21
+ "n_heads": 12,
22
+ "n_layers": 6,
23
+ "pad_token_id": 0,
24
+ "problem_type": "regression",
25
+ "qa_dropout": 0.1,
26
+ "seq_classif_dropout": 0.2,
27
+ "sinusoidal_pos_embds": false,
28
+ "tie_weights_": true,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.48.0",
31
+ "vocab_size": 30522
32
+ }
checkpoint-4916/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52992a6b1bb7b233e4054bc086b088013d7e7613bcffead944d24fd70d64c55e
3
+ size 267829484
checkpoint-4916/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c1456eb332d082a347d7cad4735bc4250845738e02e04a63fa9f17e819942a
3
+ size 535721146
checkpoint-4916/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8fac6987c200600e610946b53cf319ed29c1564951f6e89145ce16858af5139
3
+ size 15024
checkpoint-4916/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0f8befb7da5855d4149667159792c7f547534617d1616470ae82b4e1c4af49
3
+ size 15024
checkpoint-4916/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0ca0b967f2e548cdf3eee611a80ad48edb340060335df03d47e61c93cbc6aa
3
+ size 15024
checkpoint-4916/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:754aa36cc24cc6e3f8bda45f9c465b2a33ed0e611d1fa8443bdfc2f99b182652
3
+ size 15024
checkpoint-4916/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39d933709e10bf97e1b41f5aee68f3dcc996b1173e11cc1c55ae6d677f967fa
3
+ size 1064
checkpoint-4916/trainer_state.json ADDED
@@ -0,0 +1,1440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03380444645881653,
3
+ "best_model_checkpoint": "textprediction/checkpoint-4916",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4916,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01017087062652563,
13
+ "grad_norm": 4.562432289123535,
14
+ "learning_rate": 1.6260162601626018e-06,
15
+ "loss": 0.1968,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.02034174125305126,
20
+ "grad_norm": 1.4055429697036743,
21
+ "learning_rate": 3.3197831978319785e-06,
22
+ "loss": 0.0977,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.030512611879576892,
27
+ "grad_norm": 0.8549578189849854,
28
+ "learning_rate": 5.013550135501355e-06,
29
+ "loss": 0.0498,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.04068348250610252,
34
+ "grad_norm": 0.41762399673461914,
35
+ "learning_rate": 6.707317073170733e-06,
36
+ "loss": 0.0469,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.050854353132628156,
41
+ "grad_norm": 0.4172782897949219,
42
+ "learning_rate": 8.401084010840109e-06,
43
+ "loss": 0.0487,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.061025223759153785,
48
+ "grad_norm": 0.45810267329216003,
49
+ "learning_rate": 1.0094850948509485e-05,
50
+ "loss": 0.0449,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.07119609438567942,
55
+ "grad_norm": 0.41910672187805176,
56
+ "learning_rate": 1.1788617886178862e-05,
57
+ "loss": 0.0417,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.08136696501220504,
62
+ "grad_norm": 0.36320051550865173,
63
+ "learning_rate": 1.348238482384824e-05,
64
+ "loss": 0.0421,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.09153783563873068,
69
+ "grad_norm": 0.434171199798584,
70
+ "learning_rate": 1.5176151761517615e-05,
71
+ "loss": 0.0465,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.10170870626525631,
76
+ "grad_norm": 0.3281106948852539,
77
+ "learning_rate": 1.6869918699186994e-05,
78
+ "loss": 0.0433,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.11187957689178193,
83
+ "grad_norm": 0.36068961024284363,
84
+ "learning_rate": 1.856368563685637e-05,
85
+ "loss": 0.0439,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.12205044751830757,
90
+ "grad_norm": 0.5031670928001404,
91
+ "learning_rate": 2.0257452574525744e-05,
92
+ "loss": 0.0423,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.1322213181448332,
97
+ "grad_norm": 1.1825827360153198,
98
+ "learning_rate": 2.1951219512195124e-05,
99
+ "loss": 0.0427,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.14239218877135884,
104
+ "grad_norm": 0.3233006000518799,
105
+ "learning_rate": 2.36449864498645e-05,
106
+ "loss": 0.0434,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.15256305939788445,
111
+ "grad_norm": 0.4184306561946869,
112
+ "learning_rate": 2.5338753387533877e-05,
113
+ "loss": 0.0423,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.16273393002441008,
118
+ "grad_norm": 0.38505345582962036,
119
+ "learning_rate": 2.7032520325203254e-05,
120
+ "loss": 0.0415,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.17290480065093572,
125
+ "grad_norm": 0.3058652877807617,
126
+ "learning_rate": 2.872628726287263e-05,
127
+ "loss": 0.0431,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.18307567127746135,
132
+ "grad_norm": 0.43836578726768494,
133
+ "learning_rate": 3.0420054200542007e-05,
134
+ "loss": 0.0417,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.193246541903987,
139
+ "grad_norm": 0.5372226238250732,
140
+ "learning_rate": 3.2113821138211384e-05,
141
+ "loss": 0.0387,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.20341741253051263,
146
+ "grad_norm": 0.4316234886646271,
147
+ "learning_rate": 3.380758807588076e-05,
148
+ "loss": 0.0425,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.21358828315703823,
153
+ "grad_norm": 0.4142661988735199,
154
+ "learning_rate": 3.550135501355014e-05,
155
+ "loss": 0.0416,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.22375915378356387,
160
+ "grad_norm": 0.2900916635990143,
161
+ "learning_rate": 3.7195121951219514e-05,
162
+ "loss": 0.044,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.2339300244100895,
167
+ "grad_norm": 0.6529517769813538,
168
+ "learning_rate": 3.888888888888889e-05,
169
+ "loss": 0.0395,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.24410089503661514,
174
+ "grad_norm": 0.4228176176548004,
175
+ "learning_rate": 4.058265582655827e-05,
176
+ "loss": 0.0379,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.25427176566314075,
181
+ "grad_norm": 0.4153424799442291,
182
+ "learning_rate": 4.2276422764227644e-05,
183
+ "loss": 0.0419,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.2644426362896664,
188
+ "grad_norm": 0.5038943290710449,
189
+ "learning_rate": 4.397018970189702e-05,
190
+ "loss": 0.0426,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.274613506916192,
195
+ "grad_norm": 0.518610417842865,
196
+ "learning_rate": 4.56639566395664e-05,
197
+ "loss": 0.0415,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.2847843775427177,
202
+ "grad_norm": 0.22950297594070435,
203
+ "learning_rate": 4.7357723577235774e-05,
204
+ "loss": 0.0401,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.2949552481692433,
209
+ "grad_norm": 0.42790016531944275,
210
+ "learning_rate": 4.905149051490515e-05,
211
+ "loss": 0.0431,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.3051261187957689,
216
+ "grad_norm": 0.24033579230308533,
217
+ "learning_rate": 4.991711874623267e-05,
218
+ "loss": 0.0436,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.31529698942229456,
223
+ "grad_norm": 0.26290884613990784,
224
+ "learning_rate": 4.972875226039783e-05,
225
+ "loss": 0.0439,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.32546786004882017,
230
+ "grad_norm": 0.20243217051029205,
231
+ "learning_rate": 4.9540385774562993e-05,
232
+ "loss": 0.04,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.33563873067534583,
237
+ "grad_norm": 0.17254669964313507,
238
+ "learning_rate": 4.935201928872815e-05,
239
+ "loss": 0.0405,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.34580960130187144,
244
+ "grad_norm": 0.2651669681072235,
245
+ "learning_rate": 4.916365280289331e-05,
246
+ "loss": 0.0376,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.35598047192839705,
251
+ "grad_norm": 0.4401779770851135,
252
+ "learning_rate": 4.897528631705847e-05,
253
+ "loss": 0.038,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.3661513425549227,
258
+ "grad_norm": 0.23679669201374054,
259
+ "learning_rate": 4.878691983122363e-05,
260
+ "loss": 0.0417,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.3763222131814483,
265
+ "grad_norm": 0.2824667990207672,
266
+ "learning_rate": 4.8598553345388795e-05,
267
+ "loss": 0.0394,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.386493083807974,
272
+ "grad_norm": 0.24676434695720673,
273
+ "learning_rate": 4.841018685955395e-05,
274
+ "loss": 0.0375,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.3966639544344996,
279
+ "grad_norm": 0.3242221176624298,
280
+ "learning_rate": 4.822182037371911e-05,
281
+ "loss": 0.0384,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.40683482506102525,
286
+ "grad_norm": 0.4461381733417511,
287
+ "learning_rate": 4.8033453887884274e-05,
288
+ "loss": 0.0376,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.41700569568755086,
293
+ "grad_norm": 0.3799073398113251,
294
+ "learning_rate": 4.784508740204943e-05,
295
+ "loss": 0.0415,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.42717656631407647,
300
+ "grad_norm": 0.21580813825130463,
301
+ "learning_rate": 4.765672091621459e-05,
302
+ "loss": 0.0389,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.43734743694060213,
307
+ "grad_norm": 0.27040156722068787,
308
+ "learning_rate": 4.7468354430379746e-05,
309
+ "loss": 0.0393,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.44751830756712774,
314
+ "grad_norm": 0.7307707071304321,
315
+ "learning_rate": 4.7279987944544904e-05,
316
+ "loss": 0.0411,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.4576891781936534,
321
+ "grad_norm": 0.29991018772125244,
322
+ "learning_rate": 4.709162145871007e-05,
323
+ "loss": 0.0396,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.467860048820179,
328
+ "grad_norm": 0.16599082946777344,
329
+ "learning_rate": 4.6903254972875226e-05,
330
+ "loss": 0.0385,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.4780309194467046,
335
+ "grad_norm": 0.23768579959869385,
336
+ "learning_rate": 4.671488848704039e-05,
337
+ "loss": 0.0391,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.4882017900732303,
342
+ "grad_norm": 0.43703967332839966,
343
+ "learning_rate": 4.652652200120555e-05,
344
+ "loss": 0.0371,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.4983726606997559,
349
+ "grad_norm": 0.17367880046367645,
350
+ "learning_rate": 4.6338155515370705e-05,
351
+ "loss": 0.0393,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.5085435313262815,
356
+ "grad_norm": 0.2584339678287506,
357
+ "learning_rate": 4.614978902953587e-05,
358
+ "loss": 0.0389,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.5187144019528072,
363
+ "grad_norm": 0.3418385088443756,
364
+ "learning_rate": 4.596142254370103e-05,
365
+ "loss": 0.0372,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.5288852725793328,
370
+ "grad_norm": 0.41003596782684326,
371
+ "learning_rate": 4.5773056057866184e-05,
372
+ "loss": 0.0358,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.5390561432058584,
377
+ "grad_norm": 0.5690125823020935,
378
+ "learning_rate": 4.558468957203135e-05,
379
+ "loss": 0.0424,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.549227013832384,
384
+ "grad_norm": 0.25894445180892944,
385
+ "learning_rate": 4.5396323086196506e-05,
386
+ "loss": 0.0404,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.5593978844589097,
391
+ "grad_norm": 0.3278766870498657,
392
+ "learning_rate": 4.520795660036167e-05,
393
+ "loss": 0.0394,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.5695687550854354,
398
+ "grad_norm": 0.2761504352092743,
399
+ "learning_rate": 4.501959011452683e-05,
400
+ "loss": 0.0376,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.5797396257119609,
405
+ "grad_norm": 0.1649962216615677,
406
+ "learning_rate": 4.4831223628691985e-05,
407
+ "loss": 0.0379,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 0.5899104963384866,
412
+ "grad_norm": 0.19504110515117645,
413
+ "learning_rate": 4.464285714285715e-05,
414
+ "loss": 0.0371,
415
+ "step": 1450
416
+ },
417
+ {
418
+ "epoch": 0.6000813669650122,
419
+ "grad_norm": 0.17408576607704163,
420
+ "learning_rate": 4.445449065702231e-05,
421
+ "loss": 0.0394,
422
+ "step": 1475
423
+ },
424
+ {
425
+ "epoch": 0.6102522375915378,
426
+ "grad_norm": 0.5323575139045715,
427
+ "learning_rate": 4.4266124171187465e-05,
428
+ "loss": 0.0397,
429
+ "step": 1500
430
+ },
431
+ {
432
+ "epoch": 0.6204231082180635,
433
+ "grad_norm": 0.6240959167480469,
434
+ "learning_rate": 4.407775768535262e-05,
435
+ "loss": 0.0382,
436
+ "step": 1525
437
+ },
438
+ {
439
+ "epoch": 0.6305939788445891,
440
+ "grad_norm": 0.34519892930984497,
441
+ "learning_rate": 4.388939119951778e-05,
442
+ "loss": 0.0412,
443
+ "step": 1550
444
+ },
445
+ {
446
+ "epoch": 0.6407648494711147,
447
+ "grad_norm": 0.6749323606491089,
448
+ "learning_rate": 4.3701024713682944e-05,
449
+ "loss": 0.039,
450
+ "step": 1575
451
+ },
452
+ {
453
+ "epoch": 0.6509357200976403,
454
+ "grad_norm": 0.3768059313297272,
455
+ "learning_rate": 4.35126582278481e-05,
456
+ "loss": 0.0386,
457
+ "step": 1600
458
+ },
459
+ {
460
+ "epoch": 0.661106590724166,
461
+ "grad_norm": 0.22068104147911072,
462
+ "learning_rate": 4.332429174201326e-05,
463
+ "loss": 0.0383,
464
+ "step": 1625
465
+ },
466
+ {
467
+ "epoch": 0.6712774613506917,
468
+ "grad_norm": 0.14016976952552795,
469
+ "learning_rate": 4.313592525617842e-05,
470
+ "loss": 0.039,
471
+ "step": 1650
472
+ },
473
+ {
474
+ "epoch": 0.6814483319772172,
475
+ "grad_norm": 0.17742925882339478,
476
+ "learning_rate": 4.294755877034358e-05,
477
+ "loss": 0.0369,
478
+ "step": 1675
479
+ },
480
+ {
481
+ "epoch": 0.6916192026037429,
482
+ "grad_norm": 0.1665901243686676,
483
+ "learning_rate": 4.2759192284508745e-05,
484
+ "loss": 0.0361,
485
+ "step": 1700
486
+ },
487
+ {
488
+ "epoch": 0.7017900732302685,
489
+ "grad_norm": 0.20996783673763275,
490
+ "learning_rate": 4.25708257986739e-05,
491
+ "loss": 0.0372,
492
+ "step": 1725
493
+ },
494
+ {
495
+ "epoch": 0.7119609438567941,
496
+ "grad_norm": 0.5258492827415466,
497
+ "learning_rate": 4.238245931283906e-05,
498
+ "loss": 0.0377,
499
+ "step": 1750
500
+ },
501
+ {
502
+ "epoch": 0.7221318144833198,
503
+ "grad_norm": 0.2202778309583664,
504
+ "learning_rate": 4.2194092827004224e-05,
505
+ "loss": 0.0384,
506
+ "step": 1775
507
+ },
508
+ {
509
+ "epoch": 0.7323026851098454,
510
+ "grad_norm": 0.6594241857528687,
511
+ "learning_rate": 4.200572634116938e-05,
512
+ "loss": 0.0329,
513
+ "step": 1800
514
+ },
515
+ {
516
+ "epoch": 0.7424735557363711,
517
+ "grad_norm": 0.19332900643348694,
518
+ "learning_rate": 4.181735985533454e-05,
519
+ "loss": 0.0386,
520
+ "step": 1825
521
+ },
522
+ {
523
+ "epoch": 0.7526444263628966,
524
+ "grad_norm": 0.22850601375102997,
525
+ "learning_rate": 4.1628993369499704e-05,
526
+ "loss": 0.0367,
527
+ "step": 1850
528
+ },
529
+ {
530
+ "epoch": 0.7628152969894223,
531
+ "grad_norm": 0.5552480220794678,
532
+ "learning_rate": 4.144062688366486e-05,
533
+ "loss": 0.0355,
534
+ "step": 1875
535
+ },
536
+ {
537
+ "epoch": 0.772986167615948,
538
+ "grad_norm": 0.5165444612503052,
539
+ "learning_rate": 4.1252260397830025e-05,
540
+ "loss": 0.0373,
541
+ "step": 1900
542
+ },
543
+ {
544
+ "epoch": 0.7831570382424735,
545
+ "grad_norm": 0.1546785682439804,
546
+ "learning_rate": 4.1063893911995176e-05,
547
+ "loss": 0.0328,
548
+ "step": 1925
549
+ },
550
+ {
551
+ "epoch": 0.7933279088689992,
552
+ "grad_norm": 0.45292505621910095,
553
+ "learning_rate": 4.087552742616034e-05,
554
+ "loss": 0.0358,
555
+ "step": 1950
556
+ },
557
+ {
558
+ "epoch": 0.8034987794955248,
559
+ "grad_norm": 0.31234049797058105,
560
+ "learning_rate": 4.06871609403255e-05,
561
+ "loss": 0.0375,
562
+ "step": 1975
563
+ },
564
+ {
565
+ "epoch": 0.8136696501220505,
566
+ "grad_norm": 0.26489096879959106,
567
+ "learning_rate": 4.0498794454490655e-05,
568
+ "loss": 0.0391,
569
+ "step": 2000
570
+ },
571
+ {
572
+ "epoch": 0.823840520748576,
573
+ "grad_norm": 0.30596041679382324,
574
+ "learning_rate": 4.031042796865582e-05,
575
+ "loss": 0.0356,
576
+ "step": 2025
577
+ },
578
+ {
579
+ "epoch": 0.8340113913751017,
580
+ "grad_norm": 0.22221428155899048,
581
+ "learning_rate": 4.012206148282098e-05,
582
+ "loss": 0.0349,
583
+ "step": 2050
584
+ },
585
+ {
586
+ "epoch": 0.8441822620016274,
587
+ "grad_norm": 0.27404096722602844,
588
+ "learning_rate": 3.9933694996986135e-05,
589
+ "loss": 0.0391,
590
+ "step": 2075
591
+ },
592
+ {
593
+ "epoch": 0.8543531326281529,
594
+ "grad_norm": 0.21221423149108887,
595
+ "learning_rate": 3.97453285111513e-05,
596
+ "loss": 0.0364,
597
+ "step": 2100
598
+ },
599
+ {
600
+ "epoch": 0.8645240032546786,
601
+ "grad_norm": 0.5386448502540588,
602
+ "learning_rate": 3.9556962025316456e-05,
603
+ "loss": 0.0369,
604
+ "step": 2125
605
+ },
606
+ {
607
+ "epoch": 0.8746948738812043,
608
+ "grad_norm": 0.2866438627243042,
609
+ "learning_rate": 3.936859553948162e-05,
610
+ "loss": 0.0363,
611
+ "step": 2150
612
+ },
613
+ {
614
+ "epoch": 0.8848657445077298,
615
+ "grad_norm": 0.300436407327652,
616
+ "learning_rate": 3.918022905364678e-05,
617
+ "loss": 0.0353,
618
+ "step": 2175
619
+ },
620
+ {
621
+ "epoch": 0.8950366151342555,
622
+ "grad_norm": 0.1889338344335556,
623
+ "learning_rate": 3.8991862567811936e-05,
624
+ "loss": 0.0362,
625
+ "step": 2200
626
+ },
627
+ {
628
+ "epoch": 0.9052074857607811,
629
+ "grad_norm": 0.47652769088745117,
630
+ "learning_rate": 3.88034960819771e-05,
631
+ "loss": 0.0394,
632
+ "step": 2225
633
+ },
634
+ {
635
+ "epoch": 0.9153783563873068,
636
+ "grad_norm": 0.43233829736709595,
637
+ "learning_rate": 3.861512959614226e-05,
638
+ "loss": 0.0377,
639
+ "step": 2250
640
+ },
641
+ {
642
+ "epoch": 0.9255492270138324,
643
+ "grad_norm": 0.3206855058670044,
644
+ "learning_rate": 3.8426763110307415e-05,
645
+ "loss": 0.0341,
646
+ "step": 2275
647
+ },
648
+ {
649
+ "epoch": 0.935720097640358,
650
+ "grad_norm": 0.2778429090976715,
651
+ "learning_rate": 3.823839662447258e-05,
652
+ "loss": 0.035,
653
+ "step": 2300
654
+ },
655
+ {
656
+ "epoch": 0.9458909682668837,
657
+ "grad_norm": 0.22730959951877594,
658
+ "learning_rate": 3.805003013863774e-05,
659
+ "loss": 0.0346,
660
+ "step": 2325
661
+ },
662
+ {
663
+ "epoch": 0.9560618388934092,
664
+ "grad_norm": 0.29823628067970276,
665
+ "learning_rate": 3.78616636528029e-05,
666
+ "loss": 0.0363,
667
+ "step": 2350
668
+ },
669
+ {
670
+ "epoch": 0.9662327095199349,
671
+ "grad_norm": 0.5740765333175659,
672
+ "learning_rate": 3.767329716696805e-05,
673
+ "loss": 0.0348,
674
+ "step": 2375
675
+ },
676
+ {
677
+ "epoch": 0.9764035801464606,
678
+ "grad_norm": 0.4176069498062134,
679
+ "learning_rate": 3.748493068113321e-05,
680
+ "loss": 0.0387,
681
+ "step": 2400
682
+ },
683
+ {
684
+ "epoch": 0.9865744507729862,
685
+ "grad_norm": 0.14277301728725433,
686
+ "learning_rate": 3.7296564195298374e-05,
687
+ "loss": 0.0362,
688
+ "step": 2425
689
+ },
690
+ {
691
+ "epoch": 0.9967453213995118,
692
+ "grad_norm": 0.20792150497436523,
693
+ "learning_rate": 3.710819770946353e-05,
694
+ "loss": 0.0351,
695
+ "step": 2450
696
+ },
697
+ {
698
+ "epoch": 1.0,
699
+ "eval_explained_variance": 0.24840307235717773,
700
+ "eval_loss": 0.03510129451751709,
701
+ "eval_mae": 0.1494196206331253,
702
+ "eval_mse": 0.035100266337394714,
703
+ "eval_r2": 0.2482784390449524,
704
+ "eval_rmse": 0.18735065075252533,
705
+ "eval_runtime": 4.0725,
706
+ "eval_samples_per_second": 4826.955,
707
+ "eval_steps_per_second": 75.628,
708
+ "step": 2458
709
+ },
710
+ {
711
+ "epoch": 1.0069161920260374,
712
+ "grad_norm": 0.4044972360134125,
713
+ "learning_rate": 3.6919831223628695e-05,
714
+ "loss": 0.0344,
715
+ "step": 2475
716
+ },
717
+ {
718
+ "epoch": 1.017087062652563,
719
+ "grad_norm": 0.1920347958803177,
720
+ "learning_rate": 3.673146473779385e-05,
721
+ "loss": 0.0333,
722
+ "step": 2500
723
+ },
724
+ {
725
+ "epoch": 1.0272579332790888,
726
+ "grad_norm": 0.2734430432319641,
727
+ "learning_rate": 3.654309825195901e-05,
728
+ "loss": 0.0329,
729
+ "step": 2525
730
+ },
731
+ {
732
+ "epoch": 1.0374288039056143,
733
+ "grad_norm": 0.2458937019109726,
734
+ "learning_rate": 3.6354731766124175e-05,
735
+ "loss": 0.0341,
736
+ "step": 2550
737
+ },
738
+ {
739
+ "epoch": 1.0475996745321399,
740
+ "grad_norm": 0.302209734916687,
741
+ "learning_rate": 3.616636528028933e-05,
742
+ "loss": 0.0336,
743
+ "step": 2575
744
+ },
745
+ {
746
+ "epoch": 1.0577705451586656,
747
+ "grad_norm": 0.23956753313541412,
748
+ "learning_rate": 3.597799879445449e-05,
749
+ "loss": 0.0351,
750
+ "step": 2600
751
+ },
752
+ {
753
+ "epoch": 1.0679414157851912,
754
+ "grad_norm": 0.4500243663787842,
755
+ "learning_rate": 3.5789632308619654e-05,
756
+ "loss": 0.031,
757
+ "step": 2625
758
+ },
759
+ {
760
+ "epoch": 1.0781122864117167,
761
+ "grad_norm": 0.23080125451087952,
762
+ "learning_rate": 3.560126582278481e-05,
763
+ "loss": 0.0358,
764
+ "step": 2650
765
+ },
766
+ {
767
+ "epoch": 1.0882831570382425,
768
+ "grad_norm": 0.44963157176971436,
769
+ "learning_rate": 3.5412899336949976e-05,
770
+ "loss": 0.0326,
771
+ "step": 2675
772
+ },
773
+ {
774
+ "epoch": 1.098454027664768,
775
+ "grad_norm": 0.721524715423584,
776
+ "learning_rate": 3.522453285111513e-05,
777
+ "loss": 0.0383,
778
+ "step": 2700
779
+ },
780
+ {
781
+ "epoch": 1.1086248982912936,
782
+ "grad_norm": 0.24731744825839996,
783
+ "learning_rate": 3.503616636528029e-05,
784
+ "loss": 0.0354,
785
+ "step": 2725
786
+ },
787
+ {
788
+ "epoch": 1.1187957689178194,
789
+ "grad_norm": 0.19786667823791504,
790
+ "learning_rate": 3.4847799879445455e-05,
791
+ "loss": 0.0335,
792
+ "step": 2750
793
+ },
794
+ {
795
+ "epoch": 1.128966639544345,
796
+ "grad_norm": 0.16557921469211578,
797
+ "learning_rate": 3.465943339361061e-05,
798
+ "loss": 0.0349,
799
+ "step": 2775
800
+ },
801
+ {
802
+ "epoch": 1.1391375101708707,
803
+ "grad_norm": 0.30080434679985046,
804
+ "learning_rate": 3.447106690777577e-05,
805
+ "loss": 0.0335,
806
+ "step": 2800
807
+ },
808
+ {
809
+ "epoch": 1.1493083807973963,
810
+ "grad_norm": 0.2929253876209259,
811
+ "learning_rate": 3.428270042194093e-05,
812
+ "loss": 0.0338,
813
+ "step": 2825
814
+ },
815
+ {
816
+ "epoch": 1.1594792514239218,
817
+ "grad_norm": 0.19096094369888306,
818
+ "learning_rate": 3.4094333936106085e-05,
819
+ "loss": 0.0307,
820
+ "step": 2850
821
+ },
822
+ {
823
+ "epoch": 1.1696501220504476,
824
+ "grad_norm": 0.2764925956726074,
825
+ "learning_rate": 3.390596745027125e-05,
826
+ "loss": 0.034,
827
+ "step": 2875
828
+ },
829
+ {
830
+ "epoch": 1.1798209926769732,
831
+ "grad_norm": 0.36045756936073303,
832
+ "learning_rate": 3.371760096443641e-05,
833
+ "loss": 0.0327,
834
+ "step": 2900
835
+ },
836
+ {
837
+ "epoch": 1.1899918633034987,
838
+ "grad_norm": 0.4153495728969574,
839
+ "learning_rate": 3.352923447860157e-05,
840
+ "loss": 0.0341,
841
+ "step": 2925
842
+ },
843
+ {
844
+ "epoch": 1.2001627339300245,
845
+ "grad_norm": 0.3471417725086212,
846
+ "learning_rate": 3.334086799276673e-05,
847
+ "loss": 0.0329,
848
+ "step": 2950
849
+ },
850
+ {
851
+ "epoch": 1.21033360455655,
852
+ "grad_norm": 0.3038371205329895,
853
+ "learning_rate": 3.3152501506931886e-05,
854
+ "loss": 0.0344,
855
+ "step": 2975
856
+ },
857
+ {
858
+ "epoch": 1.2205044751830756,
859
+ "grad_norm": 0.589869499206543,
860
+ "learning_rate": 3.296413502109705e-05,
861
+ "loss": 0.0369,
862
+ "step": 3000
863
+ },
864
+ {
865
+ "epoch": 1.2306753458096014,
866
+ "grad_norm": 0.19090452790260315,
867
+ "learning_rate": 3.277576853526221e-05,
868
+ "loss": 0.0325,
869
+ "step": 3025
870
+ },
871
+ {
872
+ "epoch": 1.240846216436127,
873
+ "grad_norm": 0.2946130335330963,
874
+ "learning_rate": 3.2587402049427365e-05,
875
+ "loss": 0.033,
876
+ "step": 3050
877
+ },
878
+ {
879
+ "epoch": 1.2510170870626527,
880
+ "grad_norm": 0.33456677198410034,
881
+ "learning_rate": 3.239903556359253e-05,
882
+ "loss": 0.0311,
883
+ "step": 3075
884
+ },
885
+ {
886
+ "epoch": 1.2611879576891782,
887
+ "grad_norm": 0.23250174522399902,
888
+ "learning_rate": 3.221066907775769e-05,
889
+ "loss": 0.034,
890
+ "step": 3100
891
+ },
892
+ {
893
+ "epoch": 1.2713588283157038,
894
+ "grad_norm": 0.2728452682495117,
895
+ "learning_rate": 3.2022302591922845e-05,
896
+ "loss": 0.0328,
897
+ "step": 3125
898
+ },
899
+ {
900
+ "epoch": 1.2815296989422293,
901
+ "grad_norm": 0.22802890837192535,
902
+ "learning_rate": 3.183393610608801e-05,
903
+ "loss": 0.0291,
904
+ "step": 3150
905
+ },
906
+ {
907
+ "epoch": 1.2917005695687551,
908
+ "grad_norm": 0.3694087862968445,
909
+ "learning_rate": 3.1645569620253167e-05,
910
+ "loss": 0.033,
911
+ "step": 3175
912
+ },
913
+ {
914
+ "epoch": 1.3018714401952807,
915
+ "grad_norm": 0.3649369478225708,
916
+ "learning_rate": 3.145720313441833e-05,
917
+ "loss": 0.0353,
918
+ "step": 3200
919
+ },
920
+ {
921
+ "epoch": 1.3120423108218064,
922
+ "grad_norm": 0.45792657136917114,
923
+ "learning_rate": 3.126883664858349e-05,
924
+ "loss": 0.0332,
925
+ "step": 3225
926
+ },
927
+ {
928
+ "epoch": 1.322213181448332,
929
+ "grad_norm": 0.325790137052536,
930
+ "learning_rate": 3.1080470162748646e-05,
931
+ "loss": 0.0314,
932
+ "step": 3250
933
+ },
934
+ {
935
+ "epoch": 1.3323840520748575,
936
+ "grad_norm": 0.25394582748413086,
937
+ "learning_rate": 3.08921036769138e-05,
938
+ "loss": 0.0347,
939
+ "step": 3275
940
+ },
941
+ {
942
+ "epoch": 1.342554922701383,
943
+ "grad_norm": 0.25821924209594727,
944
+ "learning_rate": 3.070373719107896e-05,
945
+ "loss": 0.0326,
946
+ "step": 3300
947
+ },
948
+ {
949
+ "epoch": 1.3527257933279089,
950
+ "grad_norm": 0.24513724446296692,
951
+ "learning_rate": 3.0515370705244122e-05,
952
+ "loss": 0.0323,
953
+ "step": 3325
954
+ },
955
+ {
956
+ "epoch": 1.3628966639544344,
957
+ "grad_norm": 0.5198257565498352,
958
+ "learning_rate": 3.0327004219409283e-05,
959
+ "loss": 0.0337,
960
+ "step": 3350
961
+ },
962
+ {
963
+ "epoch": 1.3730675345809602,
964
+ "grad_norm": 0.4150083661079407,
965
+ "learning_rate": 3.0138637733574443e-05,
966
+ "loss": 0.0313,
967
+ "step": 3375
968
+ },
969
+ {
970
+ "epoch": 1.3832384052074858,
971
+ "grad_norm": 0.148391991853714,
972
+ "learning_rate": 2.9950271247739604e-05,
973
+ "loss": 0.0351,
974
+ "step": 3400
975
+ },
976
+ {
977
+ "epoch": 1.3934092758340113,
978
+ "grad_norm": 0.23965270817279816,
979
+ "learning_rate": 2.9761904761904762e-05,
980
+ "loss": 0.0338,
981
+ "step": 3425
982
+ },
983
+ {
984
+ "epoch": 1.403580146460537,
985
+ "grad_norm": 0.21719323098659515,
986
+ "learning_rate": 2.9573538276069923e-05,
987
+ "loss": 0.0305,
988
+ "step": 3450
989
+ },
990
+ {
991
+ "epoch": 1.4137510170870626,
992
+ "grad_norm": 0.25082919001579285,
993
+ "learning_rate": 2.9385171790235084e-05,
994
+ "loss": 0.0328,
995
+ "step": 3475
996
+ },
997
+ {
998
+ "epoch": 1.4239218877135884,
999
+ "grad_norm": 0.3688701093196869,
1000
+ "learning_rate": 2.9196805304400245e-05,
1001
+ "loss": 0.0352,
1002
+ "step": 3500
1003
+ },
1004
+ {
1005
+ "epoch": 1.434092758340114,
1006
+ "grad_norm": 0.3461095094680786,
1007
+ "learning_rate": 2.9008438818565402e-05,
1008
+ "loss": 0.0356,
1009
+ "step": 3525
1010
+ },
1011
+ {
1012
+ "epoch": 1.4442636289666395,
1013
+ "grad_norm": 0.2626365125179291,
1014
+ "learning_rate": 2.8820072332730563e-05,
1015
+ "loss": 0.033,
1016
+ "step": 3550
1017
+ },
1018
+ {
1019
+ "epoch": 1.454434499593165,
1020
+ "grad_norm": 0.27237704396247864,
1021
+ "learning_rate": 2.8631705846895724e-05,
1022
+ "loss": 0.0342,
1023
+ "step": 3575
1024
+ },
1025
+ {
1026
+ "epoch": 1.4646053702196908,
1027
+ "grad_norm": 0.258208304643631,
1028
+ "learning_rate": 2.8443339361060885e-05,
1029
+ "loss": 0.0327,
1030
+ "step": 3600
1031
+ },
1032
+ {
1033
+ "epoch": 1.4747762408462164,
1034
+ "grad_norm": 0.2694801390171051,
1035
+ "learning_rate": 2.8254972875226042e-05,
1036
+ "loss": 0.0353,
1037
+ "step": 3625
1038
+ },
1039
+ {
1040
+ "epoch": 1.4849471114727422,
1041
+ "grad_norm": 0.3417627513408661,
1042
+ "learning_rate": 2.8066606389391203e-05,
1043
+ "loss": 0.0325,
1044
+ "step": 3650
1045
+ },
1046
+ {
1047
+ "epoch": 1.4951179820992677,
1048
+ "grad_norm": 0.4780226945877075,
1049
+ "learning_rate": 2.7878239903556357e-05,
1050
+ "loss": 0.0338,
1051
+ "step": 3675
1052
+ },
1053
+ {
1054
+ "epoch": 1.5052888527257933,
1055
+ "grad_norm": 0.2635546922683716,
1056
+ "learning_rate": 2.7689873417721518e-05,
1057
+ "loss": 0.0321,
1058
+ "step": 3700
1059
+ },
1060
+ {
1061
+ "epoch": 1.5154597233523188,
1062
+ "grad_norm": 0.2416383922100067,
1063
+ "learning_rate": 2.750150693188668e-05,
1064
+ "loss": 0.0338,
1065
+ "step": 3725
1066
+ },
1067
+ {
1068
+ "epoch": 1.5256305939788446,
1069
+ "grad_norm": 0.5338820219039917,
1070
+ "learning_rate": 2.7313140446051837e-05,
1071
+ "loss": 0.0319,
1072
+ "step": 3750
1073
+ },
1074
+ {
1075
+ "epoch": 1.5358014646053704,
1076
+ "grad_norm": 0.35653921961784363,
1077
+ "learning_rate": 2.7124773960216997e-05,
1078
+ "loss": 0.0338,
1079
+ "step": 3775
1080
+ },
1081
+ {
1082
+ "epoch": 1.545972335231896,
1083
+ "grad_norm": 0.23320654034614563,
1084
+ "learning_rate": 2.693640747438216e-05,
1085
+ "loss": 0.0312,
1086
+ "step": 3800
1087
+ },
1088
+ {
1089
+ "epoch": 1.5561432058584215,
1090
+ "grad_norm": 0.31537100672721863,
1091
+ "learning_rate": 2.674804098854732e-05,
1092
+ "loss": 0.037,
1093
+ "step": 3825
1094
+ },
1095
+ {
1096
+ "epoch": 1.566314076484947,
1097
+ "grad_norm": 0.24033057689666748,
1098
+ "learning_rate": 2.6559674502712477e-05,
1099
+ "loss": 0.0315,
1100
+ "step": 3850
1101
+ },
1102
+ {
1103
+ "epoch": 1.5764849471114726,
1104
+ "grad_norm": 0.21600840985774994,
1105
+ "learning_rate": 2.6371308016877638e-05,
1106
+ "loss": 0.0329,
1107
+ "step": 3875
1108
+ },
1109
+ {
1110
+ "epoch": 1.5866558177379984,
1111
+ "grad_norm": 0.4238574206829071,
1112
+ "learning_rate": 2.61829415310428e-05,
1113
+ "loss": 0.0356,
1114
+ "step": 3900
1115
+ },
1116
+ {
1117
+ "epoch": 1.5968266883645241,
1118
+ "grad_norm": 0.3188216984272003,
1119
+ "learning_rate": 2.599457504520796e-05,
1120
+ "loss": 0.0321,
1121
+ "step": 3925
1122
+ },
1123
+ {
1124
+ "epoch": 1.6069975589910497,
1125
+ "grad_norm": 0.29613322019577026,
1126
+ "learning_rate": 2.5806208559373117e-05,
1127
+ "loss": 0.0288,
1128
+ "step": 3950
1129
+ },
1130
+ {
1131
+ "epoch": 1.6171684296175752,
1132
+ "grad_norm": 0.3398037850856781,
1133
+ "learning_rate": 2.5617842073538278e-05,
1134
+ "loss": 0.0356,
1135
+ "step": 3975
1136
+ },
1137
+ {
1138
+ "epoch": 1.6273393002441008,
1139
+ "grad_norm": 0.7744150757789612,
1140
+ "learning_rate": 2.542947558770344e-05,
1141
+ "loss": 0.03,
1142
+ "step": 4000
1143
+ },
1144
+ {
1145
+ "epoch": 1.6375101708706266,
1146
+ "grad_norm": 0.2858869135379791,
1147
+ "learning_rate": 2.52411091018686e-05,
1148
+ "loss": 0.0313,
1149
+ "step": 4025
1150
+ },
1151
+ {
1152
+ "epoch": 1.647681041497152,
1153
+ "grad_norm": 0.5115847587585449,
1154
+ "learning_rate": 2.5052742616033757e-05,
1155
+ "loss": 0.0321,
1156
+ "step": 4050
1157
+ },
1158
+ {
1159
+ "epoch": 1.6578519121236779,
1160
+ "grad_norm": 0.2705928385257721,
1161
+ "learning_rate": 2.4864376130198915e-05,
1162
+ "loss": 0.0299,
1163
+ "step": 4075
1164
+ },
1165
+ {
1166
+ "epoch": 1.6680227827502034,
1167
+ "grad_norm": 0.23888413608074188,
1168
+ "learning_rate": 2.4676009644364075e-05,
1169
+ "loss": 0.0302,
1170
+ "step": 4100
1171
+ },
1172
+ {
1173
+ "epoch": 1.678193653376729,
1174
+ "grad_norm": 0.251142680644989,
1175
+ "learning_rate": 2.4487643158529236e-05,
1176
+ "loss": 0.0352,
1177
+ "step": 4125
1178
+ },
1179
+ {
1180
+ "epoch": 1.6883645240032545,
1181
+ "grad_norm": 0.4335423707962036,
1182
+ "learning_rate": 2.4299276672694397e-05,
1183
+ "loss": 0.0342,
1184
+ "step": 4150
1185
+ },
1186
+ {
1187
+ "epoch": 1.6985353946297803,
1188
+ "grad_norm": 0.26633137464523315,
1189
+ "learning_rate": 2.4110910186859555e-05,
1190
+ "loss": 0.0303,
1191
+ "step": 4175
1192
+ },
1193
+ {
1194
+ "epoch": 1.708706265256306,
1195
+ "grad_norm": 0.18798017501831055,
1196
+ "learning_rate": 2.3922543701024716e-05,
1197
+ "loss": 0.0318,
1198
+ "step": 4200
1199
+ },
1200
+ {
1201
+ "epoch": 1.7188771358828316,
1202
+ "grad_norm": 0.14591360092163086,
1203
+ "learning_rate": 2.3734177215189873e-05,
1204
+ "loss": 0.0306,
1205
+ "step": 4225
1206
+ },
1207
+ {
1208
+ "epoch": 1.7290480065093572,
1209
+ "grad_norm": 0.22364169359207153,
1210
+ "learning_rate": 2.3545810729355034e-05,
1211
+ "loss": 0.031,
1212
+ "step": 4250
1213
+ },
1214
+ {
1215
+ "epoch": 1.7392188771358827,
1216
+ "grad_norm": 0.28795015811920166,
1217
+ "learning_rate": 2.3357444243520195e-05,
1218
+ "loss": 0.0337,
1219
+ "step": 4275
1220
+ },
1221
+ {
1222
+ "epoch": 1.7493897477624083,
1223
+ "grad_norm": 0.5833514332771301,
1224
+ "learning_rate": 2.3169077757685352e-05,
1225
+ "loss": 0.0356,
1226
+ "step": 4300
1227
+ },
1228
+ {
1229
+ "epoch": 1.759560618388934,
1230
+ "grad_norm": 0.20125386118888855,
1231
+ "learning_rate": 2.2980711271850513e-05,
1232
+ "loss": 0.035,
1233
+ "step": 4325
1234
+ },
1235
+ {
1236
+ "epoch": 1.7697314890154598,
1237
+ "grad_norm": 0.39359915256500244,
1238
+ "learning_rate": 2.2792344786015674e-05,
1239
+ "loss": 0.0325,
1240
+ "step": 4350
1241
+ },
1242
+ {
1243
+ "epoch": 1.7799023596419854,
1244
+ "grad_norm": 0.18621224164962769,
1245
+ "learning_rate": 2.2603978300180835e-05,
1246
+ "loss": 0.0287,
1247
+ "step": 4375
1248
+ },
1249
+ {
1250
+ "epoch": 1.790073230268511,
1251
+ "grad_norm": 0.23905445635318756,
1252
+ "learning_rate": 2.2415611814345993e-05,
1253
+ "loss": 0.0338,
1254
+ "step": 4400
1255
+ },
1256
+ {
1257
+ "epoch": 1.8002441008950365,
1258
+ "grad_norm": 0.5699689984321594,
1259
+ "learning_rate": 2.2227245328511154e-05,
1260
+ "loss": 0.034,
1261
+ "step": 4425
1262
+ },
1263
+ {
1264
+ "epoch": 1.8104149715215623,
1265
+ "grad_norm": 0.3101656138896942,
1266
+ "learning_rate": 2.203887884267631e-05,
1267
+ "loss": 0.0344,
1268
+ "step": 4450
1269
+ },
1270
+ {
1271
+ "epoch": 1.8205858421480878,
1272
+ "grad_norm": 0.19921617209911346,
1273
+ "learning_rate": 2.1850512356841472e-05,
1274
+ "loss": 0.0348,
1275
+ "step": 4475
1276
+ },
1277
+ {
1278
+ "epoch": 1.8307567127746136,
1279
+ "grad_norm": 0.4034786522388458,
1280
+ "learning_rate": 2.166214587100663e-05,
1281
+ "loss": 0.0332,
1282
+ "step": 4500
1283
+ },
1284
+ {
1285
+ "epoch": 1.8409275834011392,
1286
+ "grad_norm": 0.5094945430755615,
1287
+ "learning_rate": 2.147377938517179e-05,
1288
+ "loss": 0.0293,
1289
+ "step": 4525
1290
+ },
1291
+ {
1292
+ "epoch": 1.8510984540276647,
1293
+ "grad_norm": 0.4296955168247223,
1294
+ "learning_rate": 2.128541289933695e-05,
1295
+ "loss": 0.0318,
1296
+ "step": 4550
1297
+ },
1298
+ {
1299
+ "epoch": 1.8612693246541903,
1300
+ "grad_norm": 0.2767828404903412,
1301
+ "learning_rate": 2.1097046413502112e-05,
1302
+ "loss": 0.0337,
1303
+ "step": 4575
1304
+ },
1305
+ {
1306
+ "epoch": 1.871440195280716,
1307
+ "grad_norm": 0.2815410792827606,
1308
+ "learning_rate": 2.090867992766727e-05,
1309
+ "loss": 0.0322,
1310
+ "step": 4600
1311
+ },
1312
+ {
1313
+ "epoch": 1.8816110659072418,
1314
+ "grad_norm": 0.2712397873401642,
1315
+ "learning_rate": 2.072031344183243e-05,
1316
+ "loss": 0.0309,
1317
+ "step": 4625
1318
+ },
1319
+ {
1320
+ "epoch": 1.8917819365337674,
1321
+ "grad_norm": 0.48615217208862305,
1322
+ "learning_rate": 2.0531946955997588e-05,
1323
+ "loss": 0.0335,
1324
+ "step": 4650
1325
+ },
1326
+ {
1327
+ "epoch": 1.901952807160293,
1328
+ "grad_norm": 0.20523346960544586,
1329
+ "learning_rate": 2.034358047016275e-05,
1330
+ "loss": 0.032,
1331
+ "step": 4675
1332
+ },
1333
+ {
1334
+ "epoch": 1.9121236777868185,
1335
+ "grad_norm": 0.21368514001369476,
1336
+ "learning_rate": 2.015521398432791e-05,
1337
+ "loss": 0.0323,
1338
+ "step": 4700
1339
+ },
1340
+ {
1341
+ "epoch": 1.922294548413344,
1342
+ "grad_norm": 0.5739328265190125,
1343
+ "learning_rate": 1.9966847498493067e-05,
1344
+ "loss": 0.0324,
1345
+ "step": 4725
1346
+ },
1347
+ {
1348
+ "epoch": 1.9324654190398698,
1349
+ "grad_norm": 0.34018173813819885,
1350
+ "learning_rate": 1.9778481012658228e-05,
1351
+ "loss": 0.0333,
1352
+ "step": 4750
1353
+ },
1354
+ {
1355
+ "epoch": 1.9426362896663956,
1356
+ "grad_norm": 0.29020246863365173,
1357
+ "learning_rate": 1.959011452682339e-05,
1358
+ "loss": 0.032,
1359
+ "step": 4775
1360
+ },
1361
+ {
1362
+ "epoch": 1.9528071602929211,
1363
+ "grad_norm": 0.2339148074388504,
1364
+ "learning_rate": 1.940174804098855e-05,
1365
+ "loss": 0.0307,
1366
+ "step": 4800
1367
+ },
1368
+ {
1369
+ "epoch": 1.9629780309194467,
1370
+ "grad_norm": 0.23286353051662445,
1371
+ "learning_rate": 1.9213381555153708e-05,
1372
+ "loss": 0.0339,
1373
+ "step": 4825
1374
+ },
1375
+ {
1376
+ "epoch": 1.9731489015459722,
1377
+ "grad_norm": 0.2174939662218094,
1378
+ "learning_rate": 1.902501506931887e-05,
1379
+ "loss": 0.0318,
1380
+ "step": 4850
1381
+ },
1382
+ {
1383
+ "epoch": 1.983319772172498,
1384
+ "grad_norm": 0.2397424727678299,
1385
+ "learning_rate": 1.8836648583484026e-05,
1386
+ "loss": 0.032,
1387
+ "step": 4875
1388
+ },
1389
+ {
1390
+ "epoch": 1.9934906427990235,
1391
+ "grad_norm": 0.3197477161884308,
1392
+ "learning_rate": 1.8648282097649187e-05,
1393
+ "loss": 0.0319,
1394
+ "step": 4900
1395
+ },
1396
+ {
1397
+ "epoch": 2.0,
1398
+ "eval_explained_variance": 0.2763633728027344,
1399
+ "eval_loss": 0.03380444645881653,
1400
+ "eval_mae": 0.14292894303798676,
1401
+ "eval_mse": 0.033803146332502365,
1402
+ "eval_r2": 0.2760580778121948,
1403
+ "eval_rmse": 0.18385631980571776,
1404
+ "eval_runtime": 4.0998,
1405
+ "eval_samples_per_second": 4794.858,
1406
+ "eval_steps_per_second": 75.125,
1407
+ "step": 4916
1408
+ }
1409
+ ],
1410
+ "logging_steps": 25,
1411
+ "max_steps": 7374,
1412
+ "num_input_tokens_seen": 0,
1413
+ "num_train_epochs": 3,
1414
+ "save_steps": 500,
1415
+ "stateful_callbacks": {
1416
+ "EarlyStoppingCallback": {
1417
+ "args": {
1418
+ "early_stopping_patience": 5,
1419
+ "early_stopping_threshold": 0.01
1420
+ },
1421
+ "attributes": {
1422
+ "early_stopping_patience_counter": 1
1423
+ }
1424
+ },
1425
+ "TrainerControl": {
1426
+ "args": {
1427
+ "should_epoch_stop": false,
1428
+ "should_evaluate": false,
1429
+ "should_log": false,
1430
+ "should_save": true,
1431
+ "should_training_stop": false
1432
+ },
1433
+ "attributes": {}
1434
+ }
1435
+ },
1436
+ "total_flos": 5209584826318848.0,
1437
+ "train_batch_size": 8,
1438
+ "trial_name": null,
1439
+ "trial_params": null
1440
+ }
checkpoint-4916/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a4264247fbe1fe88d05c68d61b8084763d2f8eb4fde109f383a88bcaed7463
3
+ size 5368
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased",
3
+ "_num_labels": 1,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "target"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "target": 0
18
+ },
19
+ "max_position_embeddings": 512,
20
+ "model_type": "distilbert",
21
+ "n_heads": 12,
22
+ "n_layers": 6,
23
+ "pad_token_id": 0,
24
+ "problem_type": "regression",
25
+ "qa_dropout": 0.1,
26
+ "seq_classif_dropout": 0.2,
27
+ "sinusoidal_pos_embds": false,
28
+ "tie_weights_": true,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.48.0",
31
+ "vocab_size": 30522
32
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52992a6b1bb7b233e4054bc086b088013d7e7613bcffead944d24fd70d64c55e
3
+ size 267829484
runs/Feb22_02-13-45_r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih/events.out.tfevents.1740190427.r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih.208.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7f2b7d9f9435c3c6c5309d2e64e8b97600a6e7a4e2f247f057edb00cf5d18e
3
- size 59253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5382562246cf343789c90591b47487305c08fddb4bb31ad9423e1e4afb19f09d
3
+ size 68990
runs/Feb22_02-13-45_r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih/events.out.tfevents.1740191139.r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih.208.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c56ff9864bfc4fb732368db29e9cb5af3bfdc993c409ac957d8db486cf30a2a
3
+ size 609
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a4264247fbe1fe88d05c68d61b8084763d2f8eb4fde109f383a88bcaed7463
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "samtuckervegan/text_performance",
3
+ "model": "distilbert/distilbert-base-uncased",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "test",
17
+ "text_column": "text",
18
+ "target_column": "target",
19
+ "logging_steps": -1,
20
+ "project_name": "textprediction",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "samtuckervegan",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff