sedrickkeh commited on
Commit
393940e
·
verified ·
1 Parent(s): c610293

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dee895af46ab5d8cba5a925eecb53c22be4d03b9485e5ea0e8fd5d275d621c4
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97491f415bf9d7d38043b6f656ec2864399aa31cb8a2c49a4a78f199dfde2595
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7e0d55a0572e7fb78539f36ecfe1e496c5168f0f5cf7846024d4e4f637e498f
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce128c8a5229032a71ffa954b73ba1a30098ff6804eb93392833b739f7200cfc
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ef2311e22a54815be0db9c6c9aac1d39bb7da83f182f8e5ea63d13d6e2e68e
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74905481bd6a66d4bdc6cb212ece3221c92be772161f739f27495f2f6872e8cf
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce7100b10f3eca79b553abd837f4660582f9e872fac88c2fb498b31f7ff0da58
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd84624c51e07eed2f27e94ac2ae82517eeff5852cedb3f9a25ae29fb5219f5f
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -175,3 +175,179 @@
175
  {"current_steps": 1746, "total_steps": 5238, "eval_loss": 0.4306947588920593, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "3:39:22", "remaining_time": "7:18:45"}
176
  {"current_steps": 1750, "total_steps": 5238, "loss": 0.4117, "lr": 5e-06, "epoch": 1.002290950744559, "percentage": 33.41, "elapsed_time": "3:41:06", "remaining_time": "7:20:41"}
177
  {"current_steps": 1760, "total_steps": 5238, "loss": 0.3742, "lr": 5e-06, "epoch": 1.0080183276059564, "percentage": 33.6, "elapsed_time": "3:42:18", "remaining_time": "7:19:18"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  {"current_steps": 1746, "total_steps": 5238, "eval_loss": 0.4306947588920593, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "3:39:22", "remaining_time": "7:18:45"}
176
  {"current_steps": 1750, "total_steps": 5238, "loss": 0.4117, "lr": 5e-06, "epoch": 1.002290950744559, "percentage": 33.41, "elapsed_time": "3:41:06", "remaining_time": "7:20:41"}
177
  {"current_steps": 1760, "total_steps": 5238, "loss": 0.3742, "lr": 5e-06, "epoch": 1.0080183276059564, "percentage": 33.6, "elapsed_time": "3:42:18", "remaining_time": "7:19:18"}
178
+ {"current_steps": 1770, "total_steps": 5238, "loss": 0.3779, "lr": 5e-06, "epoch": 1.013745704467354, "percentage": 33.79, "elapsed_time": "3:43:31", "remaining_time": "7:17:57"}
179
+ {"current_steps": 1780, "total_steps": 5238, "loss": 0.3702, "lr": 5e-06, "epoch": 1.0194730813287514, "percentage": 33.98, "elapsed_time": "3:44:44", "remaining_time": "7:16:36"}
180
+ {"current_steps": 1790, "total_steps": 5238, "loss": 0.3711, "lr": 5e-06, "epoch": 1.0252004581901488, "percentage": 34.17, "elapsed_time": "3:45:57", "remaining_time": "7:15:16"}
181
+ {"current_steps": 1800, "total_steps": 5238, "loss": 0.3663, "lr": 5e-06, "epoch": 1.0309278350515463, "percentage": 34.36, "elapsed_time": "3:47:11", "remaining_time": "7:13:55"}
182
+ {"current_steps": 1810, "total_steps": 5238, "loss": 0.3785, "lr": 5e-06, "epoch": 1.036655211912944, "percentage": 34.56, "elapsed_time": "3:48:24", "remaining_time": "7:12:35"}
183
+ {"current_steps": 1820, "total_steps": 5238, "loss": 0.37, "lr": 5e-06, "epoch": 1.0423825887743414, "percentage": 34.75, "elapsed_time": "3:49:37", "remaining_time": "7:11:15"}
184
+ {"current_steps": 1830, "total_steps": 5238, "loss": 0.3773, "lr": 5e-06, "epoch": 1.0481099656357389, "percentage": 34.94, "elapsed_time": "3:50:51", "remaining_time": "7:09:55"}
185
+ {"current_steps": 1840, "total_steps": 5238, "loss": 0.3795, "lr": 5e-06, "epoch": 1.0538373424971363, "percentage": 35.13, "elapsed_time": "3:52:04", "remaining_time": "7:08:35"}
186
+ {"current_steps": 1850, "total_steps": 5238, "loss": 0.368, "lr": 5e-06, "epoch": 1.0595647193585338, "percentage": 35.32, "elapsed_time": "3:53:17", "remaining_time": "7:07:14"}
187
+ {"current_steps": 1860, "total_steps": 5238, "loss": 0.3726, "lr": 5e-06, "epoch": 1.0652920962199313, "percentage": 35.51, "elapsed_time": "3:54:30", "remaining_time": "7:05:53"}
188
+ {"current_steps": 1870, "total_steps": 5238, "loss": 0.3806, "lr": 5e-06, "epoch": 1.0710194730813287, "percentage": 35.7, "elapsed_time": "3:55:43", "remaining_time": "7:04:33"}
189
+ {"current_steps": 1880, "total_steps": 5238, "loss": 0.3719, "lr": 5e-06, "epoch": 1.0767468499427262, "percentage": 35.89, "elapsed_time": "3:56:56", "remaining_time": "7:03:13"}
190
+ {"current_steps": 1890, "total_steps": 5238, "loss": 0.373, "lr": 5e-06, "epoch": 1.0824742268041236, "percentage": 36.08, "elapsed_time": "3:58:10", "remaining_time": "7:01:53"}
191
+ {"current_steps": 1900, "total_steps": 5238, "loss": 0.3844, "lr": 5e-06, "epoch": 1.088201603665521, "percentage": 36.27, "elapsed_time": "3:59:23", "remaining_time": "7:00:34"}
192
+ {"current_steps": 1910, "total_steps": 5238, "loss": 0.3734, "lr": 5e-06, "epoch": 1.0939289805269188, "percentage": 36.46, "elapsed_time": "4:00:36", "remaining_time": "6:59:14"}
193
+ {"current_steps": 1920, "total_steps": 5238, "loss": 0.3794, "lr": 5e-06, "epoch": 1.0996563573883162, "percentage": 36.66, "elapsed_time": "4:01:49", "remaining_time": "6:57:54"}
194
+ {"current_steps": 1930, "total_steps": 5238, "loss": 0.3811, "lr": 5e-06, "epoch": 1.1053837342497137, "percentage": 36.85, "elapsed_time": "4:03:02", "remaining_time": "6:56:34"}
195
+ {"current_steps": 1940, "total_steps": 5238, "loss": 0.3673, "lr": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "4:04:14", "remaining_time": "6:55:13"}
196
+ {"current_steps": 1950, "total_steps": 5238, "loss": 0.3723, "lr": 5e-06, "epoch": 1.1168384879725086, "percentage": 37.23, "elapsed_time": "4:05:27", "remaining_time": "6:53:52"}
197
+ {"current_steps": 1960, "total_steps": 5238, "loss": 0.3716, "lr": 5e-06, "epoch": 1.122565864833906, "percentage": 37.42, "elapsed_time": "4:06:41", "remaining_time": "6:52:34"}
198
+ {"current_steps": 1970, "total_steps": 5238, "loss": 0.3757, "lr": 5e-06, "epoch": 1.1282932416953035, "percentage": 37.61, "elapsed_time": "4:07:54", "remaining_time": "6:51:14"}
199
+ {"current_steps": 1980, "total_steps": 5238, "loss": 0.3811, "lr": 5e-06, "epoch": 1.134020618556701, "percentage": 37.8, "elapsed_time": "4:09:06", "remaining_time": "6:49:53"}
200
+ {"current_steps": 1990, "total_steps": 5238, "loss": 0.3834, "lr": 5e-06, "epoch": 1.1397479954180985, "percentage": 37.99, "elapsed_time": "4:10:19", "remaining_time": "6:48:33"}
201
+ {"current_steps": 2000, "total_steps": 5238, "loss": 0.3731, "lr": 5e-06, "epoch": 1.145475372279496, "percentage": 38.18, "elapsed_time": "4:11:32", "remaining_time": "6:47:14"}
202
+ {"current_steps": 2010, "total_steps": 5238, "loss": 0.3782, "lr": 5e-06, "epoch": 1.1512027491408934, "percentage": 38.37, "elapsed_time": "4:12:44", "remaining_time": "6:45:54"}
203
+ {"current_steps": 2020, "total_steps": 5238, "loss": 0.3751, "lr": 5e-06, "epoch": 1.1569301260022908, "percentage": 38.56, "elapsed_time": "4:13:57", "remaining_time": "6:44:34"}
204
+ {"current_steps": 2030, "total_steps": 5238, "loss": 0.3761, "lr": 5e-06, "epoch": 1.1626575028636885, "percentage": 38.76, "elapsed_time": "4:15:10", "remaining_time": "6:43:15"}
205
+ {"current_steps": 2040, "total_steps": 5238, "loss": 0.3741, "lr": 5e-06, "epoch": 1.168384879725086, "percentage": 38.95, "elapsed_time": "4:16:23", "remaining_time": "6:41:55"}
206
+ {"current_steps": 2050, "total_steps": 5238, "loss": 0.3743, "lr": 5e-06, "epoch": 1.1741122565864834, "percentage": 39.14, "elapsed_time": "4:17:36", "remaining_time": "6:40:37"}
207
+ {"current_steps": 2060, "total_steps": 5238, "loss": 0.3722, "lr": 5e-06, "epoch": 1.179839633447881, "percentage": 39.33, "elapsed_time": "4:18:50", "remaining_time": "6:39:18"}
208
+ {"current_steps": 2070, "total_steps": 5238, "loss": 0.375, "lr": 5e-06, "epoch": 1.1855670103092784, "percentage": 39.52, "elapsed_time": "4:20:03", "remaining_time": "6:38:00"}
209
+ {"current_steps": 2080, "total_steps": 5238, "loss": 0.3808, "lr": 5e-06, "epoch": 1.1912943871706758, "percentage": 39.71, "elapsed_time": "4:21:17", "remaining_time": "6:36:42"}
210
+ {"current_steps": 2090, "total_steps": 5238, "loss": 0.3698, "lr": 5e-06, "epoch": 1.1970217640320733, "percentage": 39.9, "elapsed_time": "4:22:30", "remaining_time": "6:35:24"}
211
+ {"current_steps": 2100, "total_steps": 5238, "loss": 0.3699, "lr": 5e-06, "epoch": 1.2027491408934707, "percentage": 40.09, "elapsed_time": "4:23:44", "remaining_time": "6:34:05"}
212
+ {"current_steps": 2110, "total_steps": 5238, "loss": 0.378, "lr": 5e-06, "epoch": 1.2084765177548682, "percentage": 40.28, "elapsed_time": "4:24:57", "remaining_time": "6:32:47"}
213
+ {"current_steps": 2120, "total_steps": 5238, "loss": 0.3742, "lr": 5e-06, "epoch": 1.2142038946162657, "percentage": 40.47, "elapsed_time": "4:26:10", "remaining_time": "6:31:29"}
214
+ {"current_steps": 2130, "total_steps": 5238, "loss": 0.3757, "lr": 5e-06, "epoch": 1.2199312714776633, "percentage": 40.66, "elapsed_time": "4:27:24", "remaining_time": "6:30:11"}
215
+ {"current_steps": 2140, "total_steps": 5238, "loss": 0.3803, "lr": 5e-06, "epoch": 1.2256586483390608, "percentage": 40.86, "elapsed_time": "4:28:37", "remaining_time": "6:28:53"}
216
+ {"current_steps": 2150, "total_steps": 5238, "loss": 0.3799, "lr": 5e-06, "epoch": 1.2313860252004583, "percentage": 41.05, "elapsed_time": "4:29:51", "remaining_time": "6:27:34"}
217
+ {"current_steps": 2160, "total_steps": 5238, "loss": 0.3799, "lr": 5e-06, "epoch": 1.2371134020618557, "percentage": 41.24, "elapsed_time": "4:31:04", "remaining_time": "6:26:16"}
218
+ {"current_steps": 2170, "total_steps": 5238, "loss": 0.3807, "lr": 5e-06, "epoch": 1.2428407789232532, "percentage": 41.43, "elapsed_time": "4:32:17", "remaining_time": "6:24:58"}
219
+ {"current_steps": 2180, "total_steps": 5238, "loss": 0.376, "lr": 5e-06, "epoch": 1.2485681557846506, "percentage": 41.62, "elapsed_time": "4:33:30", "remaining_time": "6:23:40"}
220
+ {"current_steps": 2190, "total_steps": 5238, "loss": 0.3723, "lr": 5e-06, "epoch": 1.254295532646048, "percentage": 41.81, "elapsed_time": "4:34:44", "remaining_time": "6:22:22"}
221
+ {"current_steps": 2200, "total_steps": 5238, "loss": 0.3789, "lr": 5e-06, "epoch": 1.2600229095074456, "percentage": 42.0, "elapsed_time": "4:35:57", "remaining_time": "6:21:05"}
222
+ {"current_steps": 2210, "total_steps": 5238, "loss": 0.3801, "lr": 5e-06, "epoch": 1.265750286368843, "percentage": 42.19, "elapsed_time": "4:37:11", "remaining_time": "6:19:47"}
223
+ {"current_steps": 2220, "total_steps": 5238, "loss": 0.3714, "lr": 5e-06, "epoch": 1.2714776632302405, "percentage": 42.38, "elapsed_time": "4:38:24", "remaining_time": "6:18:29"}
224
+ {"current_steps": 2230, "total_steps": 5238, "loss": 0.373, "lr": 5e-06, "epoch": 1.277205040091638, "percentage": 42.57, "elapsed_time": "4:39:37", "remaining_time": "6:17:11"}
225
+ {"current_steps": 2240, "total_steps": 5238, "loss": 0.3847, "lr": 5e-06, "epoch": 1.2829324169530354, "percentage": 42.76, "elapsed_time": "4:40:51", "remaining_time": "6:15:53"}
226
+ {"current_steps": 2250, "total_steps": 5238, "loss": 0.3707, "lr": 5e-06, "epoch": 1.2886597938144329, "percentage": 42.96, "elapsed_time": "4:42:04", "remaining_time": "6:14:35"}
227
+ {"current_steps": 2260, "total_steps": 5238, "loss": 0.3735, "lr": 5e-06, "epoch": 1.2943871706758305, "percentage": 43.15, "elapsed_time": "4:43:18", "remaining_time": "6:13:18"}
228
+ {"current_steps": 2270, "total_steps": 5238, "loss": 0.3777, "lr": 5e-06, "epoch": 1.300114547537228, "percentage": 43.34, "elapsed_time": "4:44:31", "remaining_time": "6:12:00"}
229
+ {"current_steps": 2280, "total_steps": 5238, "loss": 0.3802, "lr": 5e-06, "epoch": 1.3058419243986255, "percentage": 43.53, "elapsed_time": "4:45:44", "remaining_time": "6:10:43"}
230
+ {"current_steps": 2290, "total_steps": 5238, "loss": 0.3642, "lr": 5e-06, "epoch": 1.311569301260023, "percentage": 43.72, "elapsed_time": "4:46:58", "remaining_time": "6:09:25"}
231
+ {"current_steps": 2300, "total_steps": 5238, "loss": 0.3752, "lr": 5e-06, "epoch": 1.3172966781214204, "percentage": 43.91, "elapsed_time": "4:48:11", "remaining_time": "6:08:08"}
232
+ {"current_steps": 2310, "total_steps": 5238, "loss": 0.3771, "lr": 5e-06, "epoch": 1.3230240549828178, "percentage": 44.1, "elapsed_time": "4:49:25", "remaining_time": "6:06:51"}
233
+ {"current_steps": 2320, "total_steps": 5238, "loss": 0.3701, "lr": 5e-06, "epoch": 1.3287514318442153, "percentage": 44.29, "elapsed_time": "4:50:38", "remaining_time": "6:05:33"}
234
+ {"current_steps": 2330, "total_steps": 5238, "loss": 0.3661, "lr": 5e-06, "epoch": 1.3344788087056128, "percentage": 44.48, "elapsed_time": "4:51:51", "remaining_time": "6:04:15"}
235
+ {"current_steps": 2340, "total_steps": 5238, "loss": 0.3659, "lr": 5e-06, "epoch": 1.3402061855670104, "percentage": 44.67, "elapsed_time": "4:53:05", "remaining_time": "6:02:58"}
236
+ {"current_steps": 2350, "total_steps": 5238, "loss": 0.369, "lr": 5e-06, "epoch": 1.345933562428408, "percentage": 44.86, "elapsed_time": "4:54:18", "remaining_time": "6:01:41"}
237
+ {"current_steps": 2360, "total_steps": 5238, "loss": 0.3773, "lr": 5e-06, "epoch": 1.3516609392898054, "percentage": 45.06, "elapsed_time": "4:55:31", "remaining_time": "6:00:23"}
238
+ {"current_steps": 2370, "total_steps": 5238, "loss": 0.3743, "lr": 5e-06, "epoch": 1.3573883161512028, "percentage": 45.25, "elapsed_time": "4:56:45", "remaining_time": "5:59:06"}
239
+ {"current_steps": 2380, "total_steps": 5238, "loss": 0.3816, "lr": 5e-06, "epoch": 1.3631156930126003, "percentage": 45.44, "elapsed_time": "4:57:58", "remaining_time": "5:57:49"}
240
+ {"current_steps": 2390, "total_steps": 5238, "loss": 0.3844, "lr": 5e-06, "epoch": 1.3688430698739977, "percentage": 45.63, "elapsed_time": "4:59:11", "remaining_time": "5:56:32"}
241
+ {"current_steps": 2400, "total_steps": 5238, "loss": 0.3672, "lr": 5e-06, "epoch": 1.3745704467353952, "percentage": 45.82, "elapsed_time": "5:00:24", "remaining_time": "5:55:14"}
242
+ {"current_steps": 2410, "total_steps": 5238, "loss": 0.373, "lr": 5e-06, "epoch": 1.3802978235967927, "percentage": 46.01, "elapsed_time": "5:01:37", "remaining_time": "5:53:56"}
243
+ {"current_steps": 2420, "total_steps": 5238, "loss": 0.3674, "lr": 5e-06, "epoch": 1.38602520045819, "percentage": 46.2, "elapsed_time": "5:02:50", "remaining_time": "5:52:38"}
244
+ {"current_steps": 2430, "total_steps": 5238, "loss": 0.3725, "lr": 5e-06, "epoch": 1.3917525773195876, "percentage": 46.39, "elapsed_time": "5:04:03", "remaining_time": "5:51:21"}
245
+ {"current_steps": 2440, "total_steps": 5238, "loss": 0.3721, "lr": 5e-06, "epoch": 1.397479954180985, "percentage": 46.58, "elapsed_time": "5:05:16", "remaining_time": "5:50:04"}
246
+ {"current_steps": 2450, "total_steps": 5238, "loss": 0.3779, "lr": 5e-06, "epoch": 1.4032073310423825, "percentage": 46.77, "elapsed_time": "5:06:30", "remaining_time": "5:48:47"}
247
+ {"current_steps": 2460, "total_steps": 5238, "loss": 0.3787, "lr": 5e-06, "epoch": 1.40893470790378, "percentage": 46.96, "elapsed_time": "5:07:43", "remaining_time": "5:47:30"}
248
+ {"current_steps": 2470, "total_steps": 5238, "loss": 0.3728, "lr": 5e-06, "epoch": 1.4146620847651776, "percentage": 47.16, "elapsed_time": "5:08:56", "remaining_time": "5:46:13"}
249
+ {"current_steps": 2480, "total_steps": 5238, "loss": 0.3723, "lr": 5e-06, "epoch": 1.420389461626575, "percentage": 47.35, "elapsed_time": "5:10:10", "remaining_time": "5:44:56"}
250
+ {"current_steps": 2490, "total_steps": 5238, "loss": 0.3707, "lr": 5e-06, "epoch": 1.4261168384879725, "percentage": 47.54, "elapsed_time": "5:11:23", "remaining_time": "5:43:39"}
251
+ {"current_steps": 2500, "total_steps": 5238, "loss": 0.3815, "lr": 5e-06, "epoch": 1.43184421534937, "percentage": 47.73, "elapsed_time": "5:12:36", "remaining_time": "5:42:21"}
252
+ {"current_steps": 2510, "total_steps": 5238, "loss": 0.369, "lr": 5e-06, "epoch": 1.4375715922107675, "percentage": 47.92, "elapsed_time": "5:13:48", "remaining_time": "5:41:03"}
253
+ {"current_steps": 2520, "total_steps": 5238, "loss": 0.3725, "lr": 5e-06, "epoch": 1.443298969072165, "percentage": 48.11, "elapsed_time": "5:15:00", "remaining_time": "5:39:45"}
254
+ {"current_steps": 2530, "total_steps": 5238, "loss": 0.3625, "lr": 5e-06, "epoch": 1.4490263459335624, "percentage": 48.3, "elapsed_time": "5:16:13", "remaining_time": "5:38:28"}
255
+ {"current_steps": 2540, "total_steps": 5238, "loss": 0.3735, "lr": 5e-06, "epoch": 1.4547537227949598, "percentage": 48.49, "elapsed_time": "5:17:27", "remaining_time": "5:37:11"}
256
+ {"current_steps": 2550, "total_steps": 5238, "loss": 0.3714, "lr": 5e-06, "epoch": 1.4604810996563573, "percentage": 48.68, "elapsed_time": "5:18:40", "remaining_time": "5:35:55"}
257
+ {"current_steps": 2560, "total_steps": 5238, "loss": 0.3737, "lr": 5e-06, "epoch": 1.466208476517755, "percentage": 48.87, "elapsed_time": "5:19:54", "remaining_time": "5:34:38"}
258
+ {"current_steps": 2570, "total_steps": 5238, "loss": 0.381, "lr": 5e-06, "epoch": 1.4719358533791524, "percentage": 49.06, "elapsed_time": "5:21:07", "remaining_time": "5:33:22"}
259
+ {"current_steps": 2580, "total_steps": 5238, "loss": 0.3671, "lr": 5e-06, "epoch": 1.47766323024055, "percentage": 49.26, "elapsed_time": "5:22:21", "remaining_time": "5:32:05"}
260
+ {"current_steps": 2590, "total_steps": 5238, "loss": 0.3746, "lr": 5e-06, "epoch": 1.4833906071019474, "percentage": 49.45, "elapsed_time": "5:23:34", "remaining_time": "5:30:48"}
261
+ {"current_steps": 2600, "total_steps": 5238, "loss": 0.3732, "lr": 5e-06, "epoch": 1.4891179839633448, "percentage": 49.64, "elapsed_time": "5:24:46", "remaining_time": "5:29:31"}
262
+ {"current_steps": 2610, "total_steps": 5238, "loss": 0.3757, "lr": 5e-06, "epoch": 1.4948453608247423, "percentage": 49.83, "elapsed_time": "5:26:00", "remaining_time": "5:28:14"}
263
+ {"current_steps": 2620, "total_steps": 5238, "loss": 0.3804, "lr": 5e-06, "epoch": 1.5005727376861397, "percentage": 50.02, "elapsed_time": "5:27:13", "remaining_time": "5:26:58"}
264
+ {"current_steps": 2630, "total_steps": 5238, "loss": 0.3808, "lr": 5e-06, "epoch": 1.5063001145475372, "percentage": 50.21, "elapsed_time": "5:28:26", "remaining_time": "5:25:41"}
265
+ {"current_steps": 2640, "total_steps": 5238, "loss": 0.3786, "lr": 5e-06, "epoch": 1.5120274914089347, "percentage": 50.4, "elapsed_time": "5:29:39", "remaining_time": "5:24:25"}
266
+ {"current_steps": 2650, "total_steps": 5238, "loss": 0.3787, "lr": 5e-06, "epoch": 1.5177548682703321, "percentage": 50.59, "elapsed_time": "5:30:53", "remaining_time": "5:23:08"}
267
+ {"current_steps": 2660, "total_steps": 5238, "loss": 0.3813, "lr": 5e-06, "epoch": 1.5234822451317296, "percentage": 50.78, "elapsed_time": "5:32:06", "remaining_time": "5:21:52"}
268
+ {"current_steps": 2670, "total_steps": 5238, "loss": 0.388, "lr": 5e-06, "epoch": 1.529209621993127, "percentage": 50.97, "elapsed_time": "5:33:19", "remaining_time": "5:20:35"}
269
+ {"current_steps": 2680, "total_steps": 5238, "loss": 0.3774, "lr": 5e-06, "epoch": 1.5349369988545245, "percentage": 51.16, "elapsed_time": "5:34:33", "remaining_time": "5:19:19"}
270
+ {"current_steps": 2690, "total_steps": 5238, "loss": 0.3804, "lr": 5e-06, "epoch": 1.540664375715922, "percentage": 51.36, "elapsed_time": "5:35:46", "remaining_time": "5:18:02"}
271
+ {"current_steps": 2700, "total_steps": 5238, "loss": 0.3715, "lr": 5e-06, "epoch": 1.5463917525773194, "percentage": 51.55, "elapsed_time": "5:36:59", "remaining_time": "5:16:46"}
272
+ {"current_steps": 2710, "total_steps": 5238, "loss": 0.3726, "lr": 5e-06, "epoch": 1.552119129438717, "percentage": 51.74, "elapsed_time": "5:38:13", "remaining_time": "5:15:30"}
273
+ {"current_steps": 2720, "total_steps": 5238, "loss": 0.3708, "lr": 5e-06, "epoch": 1.5578465063001146, "percentage": 51.93, "elapsed_time": "5:39:27", "remaining_time": "5:14:15"}
274
+ {"current_steps": 2730, "total_steps": 5238, "loss": 0.3802, "lr": 5e-06, "epoch": 1.563573883161512, "percentage": 52.12, "elapsed_time": "5:40:41", "remaining_time": "5:12:59"}
275
+ {"current_steps": 2740, "total_steps": 5238, "loss": 0.3709, "lr": 5e-06, "epoch": 1.5693012600229095, "percentage": 52.31, "elapsed_time": "5:41:54", "remaining_time": "5:11:42"}
276
+ {"current_steps": 2750, "total_steps": 5238, "loss": 0.3807, "lr": 5e-06, "epoch": 1.575028636884307, "percentage": 52.5, "elapsed_time": "5:43:08", "remaining_time": "5:10:26"}
277
+ {"current_steps": 2760, "total_steps": 5238, "loss": 0.3778, "lr": 5e-06, "epoch": 1.5807560137457046, "percentage": 52.69, "elapsed_time": "5:44:21", "remaining_time": "5:09:10"}
278
+ {"current_steps": 2770, "total_steps": 5238, "loss": 0.3694, "lr": 5e-06, "epoch": 1.586483390607102, "percentage": 52.88, "elapsed_time": "5:45:34", "remaining_time": "5:07:54"}
279
+ {"current_steps": 2780, "total_steps": 5238, "loss": 0.3664, "lr": 5e-06, "epoch": 1.5922107674684995, "percentage": 53.07, "elapsed_time": "5:46:47", "remaining_time": "5:06:37"}
280
+ {"current_steps": 2790, "total_steps": 5238, "loss": 0.3782, "lr": 5e-06, "epoch": 1.597938144329897, "percentage": 53.26, "elapsed_time": "5:48:00", "remaining_time": "5:05:21"}
281
+ {"current_steps": 2800, "total_steps": 5238, "loss": 0.3809, "lr": 5e-06, "epoch": 1.6036655211912945, "percentage": 53.46, "elapsed_time": "5:49:14", "remaining_time": "5:04:05"}
282
+ {"current_steps": 2810, "total_steps": 5238, "loss": 0.3839, "lr": 5e-06, "epoch": 1.609392898052692, "percentage": 53.65, "elapsed_time": "5:50:27", "remaining_time": "5:02:49"}
283
+ {"current_steps": 2820, "total_steps": 5238, "loss": 0.3749, "lr": 5e-06, "epoch": 1.6151202749140894, "percentage": 53.84, "elapsed_time": "5:51:40", "remaining_time": "5:01:32"}
284
+ {"current_steps": 2830, "total_steps": 5238, "loss": 0.3842, "lr": 5e-06, "epoch": 1.6208476517754868, "percentage": 54.03, "elapsed_time": "5:52:53", "remaining_time": "5:00:16"}
285
+ {"current_steps": 2840, "total_steps": 5238, "loss": 0.3762, "lr": 5e-06, "epoch": 1.6265750286368843, "percentage": 54.22, "elapsed_time": "5:54:07", "remaining_time": "4:59:00"}
286
+ {"current_steps": 2850, "total_steps": 5238, "loss": 0.3813, "lr": 5e-06, "epoch": 1.6323024054982818, "percentage": 54.41, "elapsed_time": "5:55:21", "remaining_time": "4:57:44"}
287
+ {"current_steps": 2860, "total_steps": 5238, "loss": 0.373, "lr": 5e-06, "epoch": 1.6380297823596792, "percentage": 54.6, "elapsed_time": "5:56:34", "remaining_time": "4:56:28"}
288
+ {"current_steps": 2870, "total_steps": 5238, "loss": 0.3715, "lr": 5e-06, "epoch": 1.6437571592210767, "percentage": 54.79, "elapsed_time": "5:57:47", "remaining_time": "4:55:12"}
289
+ {"current_steps": 2880, "total_steps": 5238, "loss": 0.3724, "lr": 5e-06, "epoch": 1.6494845360824741, "percentage": 54.98, "elapsed_time": "5:59:01", "remaining_time": "4:53:56"}
290
+ {"current_steps": 2890, "total_steps": 5238, "loss": 0.3795, "lr": 5e-06, "epoch": 1.6552119129438716, "percentage": 55.17, "elapsed_time": "6:00:14", "remaining_time": "4:52:40"}
291
+ {"current_steps": 2900, "total_steps": 5238, "loss": 0.3795, "lr": 5e-06, "epoch": 1.660939289805269, "percentage": 55.36, "elapsed_time": "6:01:27", "remaining_time": "4:51:24"}
292
+ {"current_steps": 2910, "total_steps": 5238, "loss": 0.373, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "6:02:40", "remaining_time": "4:50:08"}
293
+ {"current_steps": 2920, "total_steps": 5238, "loss": 0.3758, "lr": 5e-06, "epoch": 1.6723940435280642, "percentage": 55.75, "elapsed_time": "6:03:54", "remaining_time": "4:48:52"}
294
+ {"current_steps": 2930, "total_steps": 5238, "loss": 0.3731, "lr": 5e-06, "epoch": 1.6781214203894617, "percentage": 55.94, "elapsed_time": "6:05:07", "remaining_time": "4:47:36"}
295
+ {"current_steps": 2940, "total_steps": 5238, "loss": 0.3757, "lr": 5e-06, "epoch": 1.6838487972508591, "percentage": 56.13, "elapsed_time": "6:06:20", "remaining_time": "4:46:20"}
296
+ {"current_steps": 2950, "total_steps": 5238, "loss": 0.3807, "lr": 5e-06, "epoch": 1.6895761741122566, "percentage": 56.32, "elapsed_time": "6:07:34", "remaining_time": "4:45:05"}
297
+ {"current_steps": 2960, "total_steps": 5238, "loss": 0.3817, "lr": 5e-06, "epoch": 1.695303550973654, "percentage": 56.51, "elapsed_time": "6:08:47", "remaining_time": "4:43:48"}
298
+ {"current_steps": 2970, "total_steps": 5238, "loss": 0.3733, "lr": 5e-06, "epoch": 1.7010309278350515, "percentage": 56.7, "elapsed_time": "6:09:59", "remaining_time": "4:42:32"}
299
+ {"current_steps": 2980, "total_steps": 5238, "loss": 0.3713, "lr": 5e-06, "epoch": 1.7067583046964492, "percentage": 56.89, "elapsed_time": "6:11:12", "remaining_time": "4:41:16"}
300
+ {"current_steps": 2990, "total_steps": 5238, "loss": 0.3691, "lr": 5e-06, "epoch": 1.7124856815578466, "percentage": 57.08, "elapsed_time": "6:12:25", "remaining_time": "4:39:59"}
301
+ {"current_steps": 3000, "total_steps": 5238, "loss": 0.3732, "lr": 5e-06, "epoch": 1.718213058419244, "percentage": 57.27, "elapsed_time": "6:13:37", "remaining_time": "4:38:43"}
302
+ {"current_steps": 3010, "total_steps": 5238, "loss": 0.3712, "lr": 5e-06, "epoch": 1.7239404352806416, "percentage": 57.46, "elapsed_time": "6:14:50", "remaining_time": "4:37:27"}
303
+ {"current_steps": 3020, "total_steps": 5238, "loss": 0.3795, "lr": 5e-06, "epoch": 1.729667812142039, "percentage": 57.66, "elapsed_time": "6:16:04", "remaining_time": "4:36:11"}
304
+ {"current_steps": 3030, "total_steps": 5238, "loss": 0.3702, "lr": 5e-06, "epoch": 1.7353951890034365, "percentage": 57.85, "elapsed_time": "6:17:17", "remaining_time": "4:34:56"}
305
+ {"current_steps": 3040, "total_steps": 5238, "loss": 0.3685, "lr": 5e-06, "epoch": 1.741122565864834, "percentage": 58.04, "elapsed_time": "6:18:31", "remaining_time": "4:33:41"}
306
+ {"current_steps": 3050, "total_steps": 5238, "loss": 0.3708, "lr": 5e-06, "epoch": 1.7468499427262314, "percentage": 58.23, "elapsed_time": "6:19:44", "remaining_time": "4:32:25"}
307
+ {"current_steps": 3060, "total_steps": 5238, "loss": 0.3669, "lr": 5e-06, "epoch": 1.7525773195876289, "percentage": 58.42, "elapsed_time": "6:20:58", "remaining_time": "4:31:09"}
308
+ {"current_steps": 3070, "total_steps": 5238, "loss": 0.3752, "lr": 5e-06, "epoch": 1.7583046964490263, "percentage": 58.61, "elapsed_time": "6:22:11", "remaining_time": "4:29:54"}
309
+ {"current_steps": 3080, "total_steps": 5238, "loss": 0.3774, "lr": 5e-06, "epoch": 1.7640320733104238, "percentage": 58.8, "elapsed_time": "6:23:25", "remaining_time": "4:28:38"}
310
+ {"current_steps": 3090, "total_steps": 5238, "loss": 0.369, "lr": 5e-06, "epoch": 1.7697594501718212, "percentage": 58.99, "elapsed_time": "6:24:38", "remaining_time": "4:27:22"}
311
+ {"current_steps": 3100, "total_steps": 5238, "loss": 0.3744, "lr": 5e-06, "epoch": 1.7754868270332187, "percentage": 59.18, "elapsed_time": "6:25:51", "remaining_time": "4:26:07"}
312
+ {"current_steps": 3110, "total_steps": 5238, "loss": 0.3728, "lr": 5e-06, "epoch": 1.7812142038946162, "percentage": 59.37, "elapsed_time": "6:27:05", "remaining_time": "4:24:51"}
313
+ {"current_steps": 3120, "total_steps": 5238, "loss": 0.3586, "lr": 5e-06, "epoch": 1.7869415807560136, "percentage": 59.56, "elapsed_time": "6:28:18", "remaining_time": "4:23:36"}
314
+ {"current_steps": 3130, "total_steps": 5238, "loss": 0.3775, "lr": 5e-06, "epoch": 1.792668957617411, "percentage": 59.76, "elapsed_time": "6:29:31", "remaining_time": "4:22:20"}
315
+ {"current_steps": 3140, "total_steps": 5238, "loss": 0.3761, "lr": 5e-06, "epoch": 1.7983963344788088, "percentage": 59.95, "elapsed_time": "6:30:45", "remaining_time": "4:21:04"}
316
+ {"current_steps": 3150, "total_steps": 5238, "loss": 0.3711, "lr": 5e-06, "epoch": 1.8041237113402062, "percentage": 60.14, "elapsed_time": "6:31:58", "remaining_time": "4:19:49"}
317
+ {"current_steps": 3160, "total_steps": 5238, "loss": 0.3761, "lr": 5e-06, "epoch": 1.8098510882016037, "percentage": 60.33, "elapsed_time": "6:33:10", "remaining_time": "4:18:33"}
318
+ {"current_steps": 3170, "total_steps": 5238, "loss": 0.3805, "lr": 5e-06, "epoch": 1.8155784650630011, "percentage": 60.52, "elapsed_time": "6:34:23", "remaining_time": "4:17:17"}
319
+ {"current_steps": 3180, "total_steps": 5238, "loss": 0.3765, "lr": 5e-06, "epoch": 1.8213058419243986, "percentage": 60.71, "elapsed_time": "6:35:36", "remaining_time": "4:16:01"}
320
+ {"current_steps": 3190, "total_steps": 5238, "loss": 0.3658, "lr": 5e-06, "epoch": 1.827033218785796, "percentage": 60.9, "elapsed_time": "6:36:50", "remaining_time": "4:14:46"}
321
+ {"current_steps": 3200, "total_steps": 5238, "loss": 0.3791, "lr": 5e-06, "epoch": 1.8327605956471937, "percentage": 61.09, "elapsed_time": "6:38:04", "remaining_time": "4:13:31"}
322
+ {"current_steps": 3210, "total_steps": 5238, "loss": 0.3719, "lr": 5e-06, "epoch": 1.8384879725085912, "percentage": 61.28, "elapsed_time": "6:39:17", "remaining_time": "4:12:16"}
323
+ {"current_steps": 3220, "total_steps": 5238, "loss": 0.3787, "lr": 5e-06, "epoch": 1.8442153493699887, "percentage": 61.47, "elapsed_time": "6:40:31", "remaining_time": "4:11:00"}
324
+ {"current_steps": 3230, "total_steps": 5238, "loss": 0.3738, "lr": 5e-06, "epoch": 1.8499427262313861, "percentage": 61.66, "elapsed_time": "6:41:44", "remaining_time": "4:09:45"}
325
+ {"current_steps": 3240, "total_steps": 5238, "loss": 0.3848, "lr": 5e-06, "epoch": 1.8556701030927836, "percentage": 61.86, "elapsed_time": "6:42:58", "remaining_time": "4:08:30"}
326
+ {"current_steps": 3250, "total_steps": 5238, "loss": 0.3706, "lr": 5e-06, "epoch": 1.861397479954181, "percentage": 62.05, "elapsed_time": "6:44:12", "remaining_time": "4:07:14"}
327
+ {"current_steps": 3260, "total_steps": 5238, "loss": 0.3695, "lr": 5e-06, "epoch": 1.8671248568155785, "percentage": 62.24, "elapsed_time": "6:45:25", "remaining_time": "4:05:59"}
328
+ {"current_steps": 3270, "total_steps": 5238, "loss": 0.371, "lr": 5e-06, "epoch": 1.872852233676976, "percentage": 62.43, "elapsed_time": "6:46:38", "remaining_time": "4:04:44"}
329
+ {"current_steps": 3280, "total_steps": 5238, "loss": 0.3724, "lr": 5e-06, "epoch": 1.8785796105383734, "percentage": 62.62, "elapsed_time": "6:47:52", "remaining_time": "4:03:28"}
330
+ {"current_steps": 3290, "total_steps": 5238, "loss": 0.3722, "lr": 5e-06, "epoch": 1.8843069873997709, "percentage": 62.81, "elapsed_time": "6:49:05", "remaining_time": "4:02:13"}
331
+ {"current_steps": 3300, "total_steps": 5238, "loss": 0.3677, "lr": 5e-06, "epoch": 1.8900343642611683, "percentage": 63.0, "elapsed_time": "6:50:19", "remaining_time": "4:00:58"}
332
+ {"current_steps": 3310, "total_steps": 5238, "loss": 0.3751, "lr": 5e-06, "epoch": 1.8957617411225658, "percentage": 63.19, "elapsed_time": "6:51:32", "remaining_time": "3:59:43"}
333
+ {"current_steps": 3320, "total_steps": 5238, "loss": 0.374, "lr": 5e-06, "epoch": 1.9014891179839633, "percentage": 63.38, "elapsed_time": "6:52:46", "remaining_time": "3:58:27"}
334
+ {"current_steps": 3330, "total_steps": 5238, "loss": 0.3633, "lr": 5e-06, "epoch": 1.9072164948453607, "percentage": 63.57, "elapsed_time": "6:53:59", "remaining_time": "3:57:12"}
335
+ {"current_steps": 3340, "total_steps": 5238, "loss": 0.3786, "lr": 5e-06, "epoch": 1.9129438717067582, "percentage": 63.76, "elapsed_time": "6:55:11", "remaining_time": "3:55:56"}
336
+ {"current_steps": 3350, "total_steps": 5238, "loss": 0.3716, "lr": 5e-06, "epoch": 1.9186712485681556, "percentage": 63.96, "elapsed_time": "6:56:23", "remaining_time": "3:54:40"}
337
+ {"current_steps": 3360, "total_steps": 5238, "loss": 0.3661, "lr": 5e-06, "epoch": 1.9243986254295533, "percentage": 64.15, "elapsed_time": "6:57:36", "remaining_time": "3:53:24"}
338
+ {"current_steps": 3370, "total_steps": 5238, "loss": 0.3681, "lr": 5e-06, "epoch": 1.9301260022909508, "percentage": 64.34, "elapsed_time": "6:58:50", "remaining_time": "3:52:09"}
339
+ {"current_steps": 3380, "total_steps": 5238, "loss": 0.3718, "lr": 5e-06, "epoch": 1.9358533791523482, "percentage": 64.53, "elapsed_time": "7:00:03", "remaining_time": "3:50:54"}
340
+ {"current_steps": 3390, "total_steps": 5238, "loss": 0.3788, "lr": 5e-06, "epoch": 1.9415807560137457, "percentage": 64.72, "elapsed_time": "7:01:16", "remaining_time": "3:49:39"}
341
+ {"current_steps": 3400, "total_steps": 5238, "loss": 0.3685, "lr": 5e-06, "epoch": 1.9473081328751431, "percentage": 64.91, "elapsed_time": "7:02:30", "remaining_time": "3:48:24"}
342
+ {"current_steps": 3410, "total_steps": 5238, "loss": 0.3737, "lr": 5e-06, "epoch": 1.9530355097365406, "percentage": 65.1, "elapsed_time": "7:03:43", "remaining_time": "3:47:08"}
343
+ {"current_steps": 3420, "total_steps": 5238, "loss": 0.3698, "lr": 5e-06, "epoch": 1.9587628865979383, "percentage": 65.29, "elapsed_time": "7:04:56", "remaining_time": "3:45:53"}
344
+ {"current_steps": 3430, "total_steps": 5238, "loss": 0.3764, "lr": 5e-06, "epoch": 1.9644902634593358, "percentage": 65.48, "elapsed_time": "7:06:08", "remaining_time": "3:44:37"}
345
+ {"current_steps": 3440, "total_steps": 5238, "loss": 0.3701, "lr": 5e-06, "epoch": 1.9702176403207332, "percentage": 65.67, "elapsed_time": "7:07:21", "remaining_time": "3:43:21"}
346
+ {"current_steps": 3450, "total_steps": 5238, "loss": 0.3686, "lr": 5e-06, "epoch": 1.9759450171821307, "percentage": 65.86, "elapsed_time": "7:08:33", "remaining_time": "3:42:06"}
347
+ {"current_steps": 3460, "total_steps": 5238, "loss": 0.3728, "lr": 5e-06, "epoch": 1.9816723940435281, "percentage": 66.06, "elapsed_time": "7:09:46", "remaining_time": "3:40:51"}
348
+ {"current_steps": 3470, "total_steps": 5238, "loss": 0.3725, "lr": 5e-06, "epoch": 1.9873997709049256, "percentage": 66.25, "elapsed_time": "7:10:59", "remaining_time": "3:39:35"}
349
+ {"current_steps": 3480, "total_steps": 5238, "loss": 0.378, "lr": 5e-06, "epoch": 1.993127147766323, "percentage": 66.44, "elapsed_time": "7:12:13", "remaining_time": "3:38:20"}
350
+ {"current_steps": 3490, "total_steps": 5238, "loss": 0.3755, "lr": 5e-06, "epoch": 1.9988545246277205, "percentage": 66.63, "elapsed_time": "7:13:26", "remaining_time": "3:37:05"}
351
+ {"current_steps": 3492, "total_steps": 5238, "eval_loss": 0.4207070767879486, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "7:18:30", "remaining_time": "3:39:15"}
352
+ {"current_steps": 3500, "total_steps": 5238, "loss": 0.3263, "lr": 5e-06, "epoch": 2.004581901489118, "percentage": 66.82, "elapsed_time": "7:20:49", "remaining_time": "3:38:54"}
353
+ {"current_steps": 3510, "total_steps": 5238, "loss": 0.3121, "lr": 5e-06, "epoch": 2.0103092783505154, "percentage": 67.01, "elapsed_time": "7:22:02", "remaining_time": "3:37:37"}