Pranay17 commited on
Commit
4979d74
·
verified ·
1 Parent(s): a171000

Training in progress, step 40000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c58d4316cbc14ab7cac78de3b40dc31048d94feea3c3258056c9246529c02f
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ff350cb611f06224861beaded1ce40d9fe8bfeadaf0b0a892a03fd5d5020a2
3
  size 42002584
last-checkpoint/global_step40000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa9e54916a8686bb306eadba9f61d544cf521787dae09ea59e28ab3ad50188e
3
+ size 251710672
last-checkpoint/global_step40000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ba8191513f58c930b0d730cf052323c7a5d0728ee0724ebc8904044c2ce128
3
+ size 153747385
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step30000
 
1
+ global_step40000
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81410e8c68ff09be250a30bb210a84a3afd289539e330cebd1667ccee91efaa6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac4c1fbd7af888371177ba36b2926feb8e2e859d541c8a12caaa62f63cad240
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8957095512495148,
5
  "eval_steps": 1000,
6
- "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4214,6 +4214,1406 @@
4214
  "learning_rate": 0.00019143023674420996,
4215
  "loss": 1.4109,
4216
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4217
  }
4218
  ],
4219
  "logging_steps": 50,
@@ -4233,7 +5633,7 @@
4233
  "attributes": {}
4234
  }
4235
  },
4236
- "total_flos": 7.580165416503214e+17,
4237
  "train_batch_size": 2,
4238
  "trial_name": null,
4239
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1942794016660199,
5
  "eval_steps": 1000,
6
+ "global_step": 40000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4214
  "learning_rate": 0.00019143023674420996,
4215
  "loss": 1.4109,
4216
  "step": 30000
4217
+ },
4218
+ {
4219
+ "epoch": 0.8972024005015974,
4220
+ "grad_norm": 4.6257758140563965,
4221
+ "learning_rate": 0.00019141595094829115,
4222
+ "loss": 1.3679,
4223
+ "step": 30050
4224
+ },
4225
+ {
4226
+ "epoch": 0.8986952497536799,
4227
+ "grad_norm": 4.422520637512207,
4228
+ "learning_rate": 0.0001914016651523723,
4229
+ "loss": 1.3893,
4230
+ "step": 30100
4231
+ },
4232
+ {
4233
+ "epoch": 0.9001880990057624,
4234
+ "grad_norm": 4.911538124084473,
4235
+ "learning_rate": 0.00019138737935645348,
4236
+ "loss": 1.3346,
4237
+ "step": 30150
4238
+ },
4239
+ {
4240
+ "epoch": 0.901680948257845,
4241
+ "grad_norm": 6.410045623779297,
4242
+ "learning_rate": 0.00019137309356053464,
4243
+ "loss": 1.3651,
4244
+ "step": 30200
4245
+ },
4246
+ {
4247
+ "epoch": 0.9031737975099274,
4248
+ "grad_norm": 3.8565444946289062,
4249
+ "learning_rate": 0.0001913588077646158,
4250
+ "loss": 1.3436,
4251
+ "step": 30250
4252
+ },
4253
+ {
4254
+ "epoch": 0.9046666467620099,
4255
+ "grad_norm": 4.632288455963135,
4256
+ "learning_rate": 0.00019134452196869697,
4257
+ "loss": 1.3858,
4258
+ "step": 30300
4259
+ },
4260
+ {
4261
+ "epoch": 0.9061594960140925,
4262
+ "grad_norm": 5.382070064544678,
4263
+ "learning_rate": 0.00019133023617277814,
4264
+ "loss": 1.3872,
4265
+ "step": 30350
4266
+ },
4267
+ {
4268
+ "epoch": 0.907652345266175,
4269
+ "grad_norm": 4.425014019012451,
4270
+ "learning_rate": 0.0001913159503768593,
4271
+ "loss": 1.4488,
4272
+ "step": 30400
4273
+ },
4274
+ {
4275
+ "epoch": 0.9091451945182576,
4276
+ "grad_norm": 3.7266223430633545,
4277
+ "learning_rate": 0.00019130166458094047,
4278
+ "loss": 1.376,
4279
+ "step": 30450
4280
+ },
4281
+ {
4282
+ "epoch": 0.9106380437703401,
4283
+ "grad_norm": 7.25547981262207,
4284
+ "learning_rate": 0.00019128737878502163,
4285
+ "loss": 1.3488,
4286
+ "step": 30500
4287
+ },
4288
+ {
4289
+ "epoch": 0.9121308930224226,
4290
+ "grad_norm": 5.824578285217285,
4291
+ "learning_rate": 0.0001912730929891028,
4292
+ "loss": 1.4001,
4293
+ "step": 30550
4294
+ },
4295
+ {
4296
+ "epoch": 0.9136237422745052,
4297
+ "grad_norm": 5.808544635772705,
4298
+ "learning_rate": 0.00019125880719318396,
4299
+ "loss": 1.3132,
4300
+ "step": 30600
4301
+ },
4302
+ {
4303
+ "epoch": 0.9151165915265876,
4304
+ "grad_norm": 4.343785285949707,
4305
+ "learning_rate": 0.00019124452139726515,
4306
+ "loss": 1.3592,
4307
+ "step": 30650
4308
+ },
4309
+ {
4310
+ "epoch": 0.9166094407786701,
4311
+ "grad_norm": 5.0626325607299805,
4312
+ "learning_rate": 0.0001912302356013463,
4313
+ "loss": 1.4418,
4314
+ "step": 30700
4315
+ },
4316
+ {
4317
+ "epoch": 0.9181022900307527,
4318
+ "grad_norm": 4.336055755615234,
4319
+ "learning_rate": 0.00019121594980542748,
4320
+ "loss": 1.3624,
4321
+ "step": 30750
4322
+ },
4323
+ {
4324
+ "epoch": 0.9195951392828352,
4325
+ "grad_norm": 6.215260982513428,
4326
+ "learning_rate": 0.00019120166400950862,
4327
+ "loss": 1.4053,
4328
+ "step": 30800
4329
+ },
4330
+ {
4331
+ "epoch": 0.9210879885349177,
4332
+ "grad_norm": 4.496364116668701,
4333
+ "learning_rate": 0.0001911873782135898,
4334
+ "loss": 1.4117,
4335
+ "step": 30850
4336
+ },
4337
+ {
4338
+ "epoch": 0.9225808377870003,
4339
+ "grad_norm": 4.023138046264648,
4340
+ "learning_rate": 0.00019117309241767095,
4341
+ "loss": 1.3783,
4342
+ "step": 30900
4343
+ },
4344
+ {
4345
+ "epoch": 0.9240736870390828,
4346
+ "grad_norm": 3.8177783489227295,
4347
+ "learning_rate": 0.00019115880662175214,
4348
+ "loss": 1.3127,
4349
+ "step": 30950
4350
+ },
4351
+ {
4352
+ "epoch": 0.9255665362911654,
4353
+ "grad_norm": 3.893087387084961,
4354
+ "learning_rate": 0.0001911445208258333,
4355
+ "loss": 1.3359,
4356
+ "step": 31000
4357
+ },
4358
+ {
4359
+ "epoch": 0.9270593855432478,
4360
+ "grad_norm": 3.9967384338378906,
4361
+ "learning_rate": 0.00019113023502991447,
4362
+ "loss": 1.3897,
4363
+ "step": 31050
4364
+ },
4365
+ {
4366
+ "epoch": 0.9285522347953303,
4367
+ "grad_norm": 3.2351789474487305,
4368
+ "learning_rate": 0.00019111594923399563,
4369
+ "loss": 1.3406,
4370
+ "step": 31100
4371
+ },
4372
+ {
4373
+ "epoch": 0.9300450840474129,
4374
+ "grad_norm": 4.571500778198242,
4375
+ "learning_rate": 0.0001911016634380768,
4376
+ "loss": 1.3783,
4377
+ "step": 31150
4378
+ },
4379
+ {
4380
+ "epoch": 0.9315379332994954,
4381
+ "grad_norm": 6.241518020629883,
4382
+ "learning_rate": 0.00019108737764215796,
4383
+ "loss": 1.3659,
4384
+ "step": 31200
4385
+ },
4386
+ {
4387
+ "epoch": 0.9330307825515779,
4388
+ "grad_norm": 3.2547826766967773,
4389
+ "learning_rate": 0.00019107309184623913,
4390
+ "loss": 1.3137,
4391
+ "step": 31250
4392
+ },
4393
+ {
4394
+ "epoch": 0.9345236318036605,
4395
+ "grad_norm": 5.386812686920166,
4396
+ "learning_rate": 0.0001910588060503203,
4397
+ "loss": 1.3974,
4398
+ "step": 31300
4399
+ },
4400
+ {
4401
+ "epoch": 0.936016481055743,
4402
+ "grad_norm": 4.239005088806152,
4403
+ "learning_rate": 0.00019104452025440146,
4404
+ "loss": 1.3789,
4405
+ "step": 31350
4406
+ },
4407
+ {
4408
+ "epoch": 0.9375093303078255,
4409
+ "grad_norm": 3.7252326011657715,
4410
+ "learning_rate": 0.00019103023445848262,
4411
+ "loss": 1.3622,
4412
+ "step": 31400
4413
+ },
4414
+ {
4415
+ "epoch": 0.9390021795599081,
4416
+ "grad_norm": 5.890969276428223,
4417
+ "learning_rate": 0.00019101594866256381,
4418
+ "loss": 1.3905,
4419
+ "step": 31450
4420
+ },
4421
+ {
4422
+ "epoch": 0.9404950288119905,
4423
+ "grad_norm": 5.796853542327881,
4424
+ "learning_rate": 0.00019100166286664495,
4425
+ "loss": 1.3797,
4426
+ "step": 31500
4427
+ },
4428
+ {
4429
+ "epoch": 0.9419878780640731,
4430
+ "grad_norm": 4.130026817321777,
4431
+ "learning_rate": 0.00019098737707072614,
4432
+ "loss": 1.3365,
4433
+ "step": 31550
4434
+ },
4435
+ {
4436
+ "epoch": 0.9434807273161556,
4437
+ "grad_norm": 3.65081524848938,
4438
+ "learning_rate": 0.00019097309127480728,
4439
+ "loss": 1.4347,
4440
+ "step": 31600
4441
+ },
4442
+ {
4443
+ "epoch": 0.9449735765682381,
4444
+ "grad_norm": 4.91404390335083,
4445
+ "learning_rate": 0.00019095880547888847,
4446
+ "loss": 1.4056,
4447
+ "step": 31650
4448
+ },
4449
+ {
4450
+ "epoch": 0.9464664258203207,
4451
+ "grad_norm": 4.7272114753723145,
4452
+ "learning_rate": 0.00019094451968296964,
4453
+ "loss": 1.2828,
4454
+ "step": 31700
4455
+ },
4456
+ {
4457
+ "epoch": 0.9479592750724032,
4458
+ "grad_norm": 5.111660957336426,
4459
+ "learning_rate": 0.0001909302338870508,
4460
+ "loss": 1.3486,
4461
+ "step": 31750
4462
+ },
4463
+ {
4464
+ "epoch": 0.9494521243244857,
4465
+ "grad_norm": 3.1206741333007812,
4466
+ "learning_rate": 0.00019091594809113197,
4467
+ "loss": 1.3416,
4468
+ "step": 31800
4469
+ },
4470
+ {
4471
+ "epoch": 0.9509449735765683,
4472
+ "grad_norm": 4.359163761138916,
4473
+ "learning_rate": 0.00019090166229521313,
4474
+ "loss": 1.3471,
4475
+ "step": 31850
4476
+ },
4477
+ {
4478
+ "epoch": 0.9524378228286507,
4479
+ "grad_norm": 4.667807102203369,
4480
+ "learning_rate": 0.0001908873764992943,
4481
+ "loss": 1.4042,
4482
+ "step": 31900
4483
+ },
4484
+ {
4485
+ "epoch": 0.9539306720807332,
4486
+ "grad_norm": 4.410369873046875,
4487
+ "learning_rate": 0.00019087309070337546,
4488
+ "loss": 1.3909,
4489
+ "step": 31950
4490
+ },
4491
+ {
4492
+ "epoch": 0.9554235213328158,
4493
+ "grad_norm": 4.987631320953369,
4494
+ "learning_rate": 0.00019085880490745663,
4495
+ "loss": 1.3548,
4496
+ "step": 32000
4497
+ },
4498
+ {
4499
+ "epoch": 0.9569163705848983,
4500
+ "grad_norm": 4.66121768951416,
4501
+ "learning_rate": 0.0001908445191115378,
4502
+ "loss": 1.3753,
4503
+ "step": 32050
4504
+ },
4505
+ {
4506
+ "epoch": 0.9584092198369809,
4507
+ "grad_norm": 6.010061740875244,
4508
+ "learning_rate": 0.00019083023331561896,
4509
+ "loss": 1.4128,
4510
+ "step": 32100
4511
+ },
4512
+ {
4513
+ "epoch": 0.9599020690890634,
4514
+ "grad_norm": 3.118246078491211,
4515
+ "learning_rate": 0.00019081594751970012,
4516
+ "loss": 1.3625,
4517
+ "step": 32150
4518
+ },
4519
+ {
4520
+ "epoch": 0.9613949183411459,
4521
+ "grad_norm": 3.821732759475708,
4522
+ "learning_rate": 0.00019080166172378128,
4523
+ "loss": 1.3691,
4524
+ "step": 32200
4525
+ },
4526
+ {
4527
+ "epoch": 0.9628877675932285,
4528
+ "grad_norm": 5.321105480194092,
4529
+ "learning_rate": 0.00019078737592786245,
4530
+ "loss": 1.3499,
4531
+ "step": 32250
4532
+ },
4533
+ {
4534
+ "epoch": 0.964380616845311,
4535
+ "grad_norm": 5.963992595672607,
4536
+ "learning_rate": 0.00019077309013194361,
4537
+ "loss": 1.3145,
4538
+ "step": 32300
4539
+ },
4540
+ {
4541
+ "epoch": 0.9658734660973934,
4542
+ "grad_norm": 4.071386337280273,
4543
+ "learning_rate": 0.00019075880433602478,
4544
+ "loss": 1.3215,
4545
+ "step": 32350
4546
+ },
4547
+ {
4548
+ "epoch": 0.967366315349476,
4549
+ "grad_norm": 3.7641048431396484,
4550
+ "learning_rate": 0.00019074451854010594,
4551
+ "loss": 1.3846,
4552
+ "step": 32400
4553
+ },
4554
+ {
4555
+ "epoch": 0.9688591646015585,
4556
+ "grad_norm": 5.985085487365723,
4557
+ "learning_rate": 0.0001907302327441871,
4558
+ "loss": 1.3383,
4559
+ "step": 32450
4560
+ },
4561
+ {
4562
+ "epoch": 0.970352013853641,
4563
+ "grad_norm": 6.602528095245361,
4564
+ "learning_rate": 0.0001907159469482683,
4565
+ "loss": 1.4195,
4566
+ "step": 32500
4567
+ },
4568
+ {
4569
+ "epoch": 0.9718448631057236,
4570
+ "grad_norm": 5.370469570159912,
4571
+ "learning_rate": 0.00019070166115234944,
4572
+ "loss": 1.3485,
4573
+ "step": 32550
4574
+ },
4575
+ {
4576
+ "epoch": 0.9733377123578061,
4577
+ "grad_norm": 4.588457107543945,
4578
+ "learning_rate": 0.00019068737535643063,
4579
+ "loss": 1.3896,
4580
+ "step": 32600
4581
+ },
4582
+ {
4583
+ "epoch": 0.9748305616098887,
4584
+ "grad_norm": 6.389341354370117,
4585
+ "learning_rate": 0.00019067308956051177,
4586
+ "loss": 1.3118,
4587
+ "step": 32650
4588
+ },
4589
+ {
4590
+ "epoch": 0.9763234108619712,
4591
+ "grad_norm": 4.051975250244141,
4592
+ "learning_rate": 0.00019065880376459296,
4593
+ "loss": 1.4065,
4594
+ "step": 32700
4595
+ },
4596
+ {
4597
+ "epoch": 0.9778162601140536,
4598
+ "grad_norm": 5.474813938140869,
4599
+ "learning_rate": 0.0001906445179686741,
4600
+ "loss": 1.3967,
4601
+ "step": 32750
4602
+ },
4603
+ {
4604
+ "epoch": 0.9793091093661362,
4605
+ "grad_norm": 4.033454895019531,
4606
+ "learning_rate": 0.0001906302321727553,
4607
+ "loss": 1.3212,
4608
+ "step": 32800
4609
+ },
4610
+ {
4611
+ "epoch": 0.9808019586182187,
4612
+ "grad_norm": 4.46946382522583,
4613
+ "learning_rate": 0.00019061594637683645,
4614
+ "loss": 1.3917,
4615
+ "step": 32850
4616
+ },
4617
+ {
4618
+ "epoch": 0.9822948078703012,
4619
+ "grad_norm": 4.98955774307251,
4620
+ "learning_rate": 0.00019060166058091762,
4621
+ "loss": 1.3814,
4622
+ "step": 32900
4623
+ },
4624
+ {
4625
+ "epoch": 0.9837876571223838,
4626
+ "grad_norm": 4.879435062408447,
4627
+ "learning_rate": 0.00019058737478499878,
4628
+ "loss": 1.3486,
4629
+ "step": 32950
4630
+ },
4631
+ {
4632
+ "epoch": 0.9852805063744663,
4633
+ "grad_norm": 4.0941925048828125,
4634
+ "learning_rate": 0.00019057308898907995,
4635
+ "loss": 1.3289,
4636
+ "step": 33000
4637
+ },
4638
+ {
4639
+ "epoch": 0.9867733556265488,
4640
+ "grad_norm": 5.2992706298828125,
4641
+ "learning_rate": 0.0001905588031931611,
4642
+ "loss": 1.3458,
4643
+ "step": 33050
4644
+ },
4645
+ {
4646
+ "epoch": 0.9882662048786314,
4647
+ "grad_norm": 4.284411430358887,
4648
+ "learning_rate": 0.00019054451739724228,
4649
+ "loss": 1.3932,
4650
+ "step": 33100
4651
+ },
4652
+ {
4653
+ "epoch": 0.9897590541307139,
4654
+ "grad_norm": 4.969634532928467,
4655
+ "learning_rate": 0.00019053023160132344,
4656
+ "loss": 1.4044,
4657
+ "step": 33150
4658
+ },
4659
+ {
4660
+ "epoch": 0.9912519033827965,
4661
+ "grad_norm": 5.532953262329102,
4662
+ "learning_rate": 0.0001905159458054046,
4663
+ "loss": 1.3658,
4664
+ "step": 33200
4665
+ },
4666
+ {
4667
+ "epoch": 0.9927447526348789,
4668
+ "grad_norm": 4.402670383453369,
4669
+ "learning_rate": 0.00019050166000948577,
4670
+ "loss": 1.343,
4671
+ "step": 33250
4672
+ },
4673
+ {
4674
+ "epoch": 0.9942376018869614,
4675
+ "grad_norm": 4.458900451660156,
4676
+ "learning_rate": 0.00019048737421356696,
4677
+ "loss": 1.4002,
4678
+ "step": 33300
4679
+ },
4680
+ {
4681
+ "epoch": 0.995730451139044,
4682
+ "grad_norm": 3.934340238571167,
4683
+ "learning_rate": 0.0001904730884176481,
4684
+ "loss": 1.413,
4685
+ "step": 33350
4686
+ },
4687
+ {
4688
+ "epoch": 0.9972233003911265,
4689
+ "grad_norm": 4.274466514587402,
4690
+ "learning_rate": 0.0001904588026217293,
4691
+ "loss": 1.3552,
4692
+ "step": 33400
4693
+ },
4694
+ {
4695
+ "epoch": 0.998716149643209,
4696
+ "grad_norm": 4.774006366729736,
4697
+ "learning_rate": 0.00019044451682581043,
4698
+ "loss": 1.3825,
4699
+ "step": 33450
4700
+ },
4701
+ {
4702
+ "epoch": 1.0002089988952916,
4703
+ "grad_norm": 3.870762825012207,
4704
+ "learning_rate": 0.00019043023102989162,
4705
+ "loss": 1.3715,
4706
+ "step": 33500
4707
+ },
4708
+ {
4709
+ "epoch": 1.001701848147374,
4710
+ "grad_norm": 3.595686912536621,
4711
+ "learning_rate": 0.00019041594523397276,
4712
+ "loss": 1.3352,
4713
+ "step": 33550
4714
+ },
4715
+ {
4716
+ "epoch": 1.0031946973994565,
4717
+ "grad_norm": 5.492071151733398,
4718
+ "learning_rate": 0.00019040165943805395,
4719
+ "loss": 1.2927,
4720
+ "step": 33600
4721
+ },
4722
+ {
4723
+ "epoch": 1.004687546651539,
4724
+ "grad_norm": 5.381584167480469,
4725
+ "learning_rate": 0.00019038737364213511,
4726
+ "loss": 1.2545,
4727
+ "step": 33650
4728
+ },
4729
+ {
4730
+ "epoch": 1.0061803959036217,
4731
+ "grad_norm": 5.983656883239746,
4732
+ "learning_rate": 0.00019037308784621628,
4733
+ "loss": 1.2393,
4734
+ "step": 33700
4735
+ },
4736
+ {
4737
+ "epoch": 1.0076732451557042,
4738
+ "grad_norm": 4.956115245819092,
4739
+ "learning_rate": 0.00019035880205029744,
4740
+ "loss": 1.2931,
4741
+ "step": 33750
4742
+ },
4743
+ {
4744
+ "epoch": 1.0091660944077867,
4745
+ "grad_norm": 4.604572296142578,
4746
+ "learning_rate": 0.0001903445162543786,
4747
+ "loss": 1.2786,
4748
+ "step": 33800
4749
+ },
4750
+ {
4751
+ "epoch": 1.0106589436598692,
4752
+ "grad_norm": 5.043067932128906,
4753
+ "learning_rate": 0.00019033023045845977,
4754
+ "loss": 1.2933,
4755
+ "step": 33850
4756
+ },
4757
+ {
4758
+ "epoch": 1.0121517929119517,
4759
+ "grad_norm": 4.53336238861084,
4760
+ "learning_rate": 0.00019031594466254094,
4761
+ "loss": 1.3114,
4762
+ "step": 33900
4763
+ },
4764
+ {
4765
+ "epoch": 1.0136446421640344,
4766
+ "grad_norm": 5.106605052947998,
4767
+ "learning_rate": 0.0001903016588666221,
4768
+ "loss": 1.2994,
4769
+ "step": 33950
4770
+ },
4771
+ {
4772
+ "epoch": 1.0151374914161169,
4773
+ "grad_norm": 5.796351909637451,
4774
+ "learning_rate": 0.00019028737307070327,
4775
+ "loss": 1.2555,
4776
+ "step": 34000
4777
+ },
4778
+ {
4779
+ "epoch": 1.0166303406681994,
4780
+ "grad_norm": 4.970977306365967,
4781
+ "learning_rate": 0.00019027308727478443,
4782
+ "loss": 1.3343,
4783
+ "step": 34050
4784
+ },
4785
+ {
4786
+ "epoch": 1.0181231899202818,
4787
+ "grad_norm": 4.233397960662842,
4788
+ "learning_rate": 0.00019025880147886562,
4789
+ "loss": 1.2485,
4790
+ "step": 34100
4791
+ },
4792
+ {
4793
+ "epoch": 1.0196160391723643,
4794
+ "grad_norm": 4.012045383453369,
4795
+ "learning_rate": 0.00019024451568294676,
4796
+ "loss": 1.2816,
4797
+ "step": 34150
4798
+ },
4799
+ {
4800
+ "epoch": 1.0211088884244468,
4801
+ "grad_norm": 4.715073585510254,
4802
+ "learning_rate": 0.00019023022988702795,
4803
+ "loss": 1.2541,
4804
+ "step": 34200
4805
+ },
4806
+ {
4807
+ "epoch": 1.0226017376765295,
4808
+ "grad_norm": 3.7285947799682617,
4809
+ "learning_rate": 0.0001902159440911091,
4810
+ "loss": 1.3094,
4811
+ "step": 34250
4812
+ },
4813
+ {
4814
+ "epoch": 1.024094586928612,
4815
+ "grad_norm": 4.012038707733154,
4816
+ "learning_rate": 0.00019020165829519028,
4817
+ "loss": 1.2791,
4818
+ "step": 34300
4819
+ },
4820
+ {
4821
+ "epoch": 1.0255874361806945,
4822
+ "grad_norm": 4.04899263381958,
4823
+ "learning_rate": 0.00019018737249927142,
4824
+ "loss": 1.3085,
4825
+ "step": 34350
4826
+ },
4827
+ {
4828
+ "epoch": 1.027080285432777,
4829
+ "grad_norm": 4.380834102630615,
4830
+ "learning_rate": 0.0001901730867033526,
4831
+ "loss": 1.3212,
4832
+ "step": 34400
4833
+ },
4834
+ {
4835
+ "epoch": 1.0285731346848594,
4836
+ "grad_norm": 5.275148868560791,
4837
+ "learning_rate": 0.00019015880090743378,
4838
+ "loss": 1.2749,
4839
+ "step": 34450
4840
+ },
4841
+ {
4842
+ "epoch": 1.0300659839369422,
4843
+ "grad_norm": 4.162272930145264,
4844
+ "learning_rate": 0.00019014451511151494,
4845
+ "loss": 1.296,
4846
+ "step": 34500
4847
+ },
4848
+ {
4849
+ "epoch": 1.0315588331890246,
4850
+ "grad_norm": 4.128006935119629,
4851
+ "learning_rate": 0.0001901302293155961,
4852
+ "loss": 1.2547,
4853
+ "step": 34550
4854
+ },
4855
+ {
4856
+ "epoch": 1.0330516824411071,
4857
+ "grad_norm": 3.930121421813965,
4858
+ "learning_rate": 0.00019011594351967727,
4859
+ "loss": 1.2831,
4860
+ "step": 34600
4861
+ },
4862
+ {
4863
+ "epoch": 1.0345445316931896,
4864
+ "grad_norm": 4.515873908996582,
4865
+ "learning_rate": 0.00019010165772375843,
4866
+ "loss": 1.2747,
4867
+ "step": 34650
4868
+ },
4869
+ {
4870
+ "epoch": 1.036037380945272,
4871
+ "grad_norm": 4.484467506408691,
4872
+ "learning_rate": 0.0001900873719278396,
4873
+ "loss": 1.289,
4874
+ "step": 34700
4875
+ },
4876
+ {
4877
+ "epoch": 1.0375302301973546,
4878
+ "grad_norm": 3.8937742710113525,
4879
+ "learning_rate": 0.00019007308613192076,
4880
+ "loss": 1.2939,
4881
+ "step": 34750
4882
+ },
4883
+ {
4884
+ "epoch": 1.0390230794494373,
4885
+ "grad_norm": 4.367059230804443,
4886
+ "learning_rate": 0.00019005880033600193,
4887
+ "loss": 1.2522,
4888
+ "step": 34800
4889
+ },
4890
+ {
4891
+ "epoch": 1.0405159287015198,
4892
+ "grad_norm": 5.143396854400635,
4893
+ "learning_rate": 0.0001900445145400831,
4894
+ "loss": 1.2996,
4895
+ "step": 34850
4896
+ },
4897
+ {
4898
+ "epoch": 1.0420087779536022,
4899
+ "grad_norm": 5.060800552368164,
4900
+ "learning_rate": 0.00019003022874416426,
4901
+ "loss": 1.3122,
4902
+ "step": 34900
4903
+ },
4904
+ {
4905
+ "epoch": 1.0435016272056847,
4906
+ "grad_norm": 4.775914192199707,
4907
+ "learning_rate": 0.00019001594294824542,
4908
+ "loss": 1.2595,
4909
+ "step": 34950
4910
+ },
4911
+ {
4912
+ "epoch": 1.0449944764577672,
4913
+ "grad_norm": 5.256932258605957,
4914
+ "learning_rate": 0.0001900016571523266,
4915
+ "loss": 1.3148,
4916
+ "step": 35000
4917
+ },
4918
+ {
4919
+ "epoch": 1.04648732570985,
4920
+ "grad_norm": 4.253088474273682,
4921
+ "learning_rate": 0.00018998737135640775,
4922
+ "loss": 1.2575,
4923
+ "step": 35050
4924
+ },
4925
+ {
4926
+ "epoch": 1.0479801749619324,
4927
+ "grad_norm": 4.784812927246094,
4928
+ "learning_rate": 0.00018997308556048892,
4929
+ "loss": 1.2911,
4930
+ "step": 35100
4931
+ },
4932
+ {
4933
+ "epoch": 1.049473024214015,
4934
+ "grad_norm": 3.9015183448791504,
4935
+ "learning_rate": 0.0001899587997645701,
4936
+ "loss": 1.2647,
4937
+ "step": 35150
4938
+ },
4939
+ {
4940
+ "epoch": 1.0509658734660974,
4941
+ "grad_norm": 4.729675769805908,
4942
+ "learning_rate": 0.00018994451396865125,
4943
+ "loss": 1.2876,
4944
+ "step": 35200
4945
+ },
4946
+ {
4947
+ "epoch": 1.0524587227181799,
4948
+ "grad_norm": 4.921074867248535,
4949
+ "learning_rate": 0.00018993022817273244,
4950
+ "loss": 1.2956,
4951
+ "step": 35250
4952
+ },
4953
+ {
4954
+ "epoch": 1.0539515719702623,
4955
+ "grad_norm": 4.466576099395752,
4956
+ "learning_rate": 0.00018991594237681357,
4957
+ "loss": 1.2938,
4958
+ "step": 35300
4959
+ },
4960
+ {
4961
+ "epoch": 1.055444421222345,
4962
+ "grad_norm": 4.142183780670166,
4963
+ "learning_rate": 0.00018990165658089477,
4964
+ "loss": 1.282,
4965
+ "step": 35350
4966
+ },
4967
+ {
4968
+ "epoch": 1.0569372704744275,
4969
+ "grad_norm": 4.372234344482422,
4970
+ "learning_rate": 0.0001898873707849759,
4971
+ "loss": 1.2793,
4972
+ "step": 35400
4973
+ },
4974
+ {
4975
+ "epoch": 1.05843011972651,
4976
+ "grad_norm": 5.349823474884033,
4977
+ "learning_rate": 0.0001898730849890571,
4978
+ "loss": 1.3678,
4979
+ "step": 35450
4980
+ },
4981
+ {
4982
+ "epoch": 1.0599229689785925,
4983
+ "grad_norm": 4.57612419128418,
4984
+ "learning_rate": 0.00018985879919313826,
4985
+ "loss": 1.3256,
4986
+ "step": 35500
4987
+ },
4988
+ {
4989
+ "epoch": 1.061415818230675,
4990
+ "grad_norm": 5.3342156410217285,
4991
+ "learning_rate": 0.00018984451339721942,
4992
+ "loss": 1.2897,
4993
+ "step": 35550
4994
+ },
4995
+ {
4996
+ "epoch": 1.0629086674827577,
4997
+ "grad_norm": 5.361871242523193,
4998
+ "learning_rate": 0.0001898302276013006,
4999
+ "loss": 1.2933,
5000
+ "step": 35600
5001
+ },
5002
+ {
5003
+ "epoch": 1.0644015167348402,
5004
+ "grad_norm": 3.6657068729400635,
5005
+ "learning_rate": 0.00018981594180538175,
5006
+ "loss": 1.3359,
5007
+ "step": 35650
5008
+ },
5009
+ {
5010
+ "epoch": 1.0658943659869227,
5011
+ "grad_norm": 4.428374290466309,
5012
+ "learning_rate": 0.00018980165600946292,
5013
+ "loss": 1.3278,
5014
+ "step": 35700
5015
+ },
5016
+ {
5017
+ "epoch": 1.0673872152390051,
5018
+ "grad_norm": 5.079537391662598,
5019
+ "learning_rate": 0.00018978737021354408,
5020
+ "loss": 1.2699,
5021
+ "step": 35750
5022
+ },
5023
+ {
5024
+ "epoch": 1.0688800644910876,
5025
+ "grad_norm": 5.303153038024902,
5026
+ "learning_rate": 0.00018977308441762525,
5027
+ "loss": 1.2985,
5028
+ "step": 35800
5029
+ },
5030
+ {
5031
+ "epoch": 1.0703729137431701,
5032
+ "grad_norm": 4.319901943206787,
5033
+ "learning_rate": 0.0001897587986217064,
5034
+ "loss": 1.2661,
5035
+ "step": 35850
5036
+ },
5037
+ {
5038
+ "epoch": 1.0718657629952528,
5039
+ "grad_norm": 6.0582451820373535,
5040
+ "learning_rate": 0.00018974451282578758,
5041
+ "loss": 1.2837,
5042
+ "step": 35900
5043
+ },
5044
+ {
5045
+ "epoch": 1.0733586122473353,
5046
+ "grad_norm": 5.738245964050293,
5047
+ "learning_rate": 0.00018973022702986877,
5048
+ "loss": 1.3075,
5049
+ "step": 35950
5050
+ },
5051
+ {
5052
+ "epoch": 1.0748514614994178,
5053
+ "grad_norm": 4.602933883666992,
5054
+ "learning_rate": 0.0001897159412339499,
5055
+ "loss": 1.2585,
5056
+ "step": 36000
5057
+ },
5058
+ {
5059
+ "epoch": 1.0763443107515003,
5060
+ "grad_norm": 4.273153305053711,
5061
+ "learning_rate": 0.0001897016554380311,
5062
+ "loss": 1.3382,
5063
+ "step": 36050
5064
+ },
5065
+ {
5066
+ "epoch": 1.0778371600035828,
5067
+ "grad_norm": 4.225873947143555,
5068
+ "learning_rate": 0.00018968736964211224,
5069
+ "loss": 1.2964,
5070
+ "step": 36100
5071
+ },
5072
+ {
5073
+ "epoch": 1.0793300092556652,
5074
+ "grad_norm": 4.828727722167969,
5075
+ "learning_rate": 0.00018967308384619343,
5076
+ "loss": 1.3734,
5077
+ "step": 36150
5078
+ },
5079
+ {
5080
+ "epoch": 1.080822858507748,
5081
+ "grad_norm": 3.7251532077789307,
5082
+ "learning_rate": 0.00018965879805027457,
5083
+ "loss": 1.2706,
5084
+ "step": 36200
5085
+ },
5086
+ {
5087
+ "epoch": 1.0823157077598304,
5088
+ "grad_norm": 4.92055606842041,
5089
+ "learning_rate": 0.00018964451225435576,
5090
+ "loss": 1.3187,
5091
+ "step": 36250
5092
+ },
5093
+ {
5094
+ "epoch": 1.083808557011913,
5095
+ "grad_norm": 4.827970027923584,
5096
+ "learning_rate": 0.00018963022645843692,
5097
+ "loss": 1.2574,
5098
+ "step": 36300
5099
+ },
5100
+ {
5101
+ "epoch": 1.0853014062639954,
5102
+ "grad_norm": 4.742151737213135,
5103
+ "learning_rate": 0.00018961594066251809,
5104
+ "loss": 1.3076,
5105
+ "step": 36350
5106
+ },
5107
+ {
5108
+ "epoch": 1.086794255516078,
5109
+ "grad_norm": 3.70794939994812,
5110
+ "learning_rate": 0.00018960165486659925,
5111
+ "loss": 1.3075,
5112
+ "step": 36400
5113
+ },
5114
+ {
5115
+ "epoch": 1.0882871047681606,
5116
+ "grad_norm": 4.062809944152832,
5117
+ "learning_rate": 0.00018958736907068042,
5118
+ "loss": 1.2628,
5119
+ "step": 36450
5120
+ },
5121
+ {
5122
+ "epoch": 1.089779954020243,
5123
+ "grad_norm": 5.072466850280762,
5124
+ "learning_rate": 0.00018957308327476158,
5125
+ "loss": 1.3278,
5126
+ "step": 36500
5127
+ },
5128
+ {
5129
+ "epoch": 1.0912728032723256,
5130
+ "grad_norm": 4.530171871185303,
5131
+ "learning_rate": 0.00018955879747884274,
5132
+ "loss": 1.3233,
5133
+ "step": 36550
5134
+ },
5135
+ {
5136
+ "epoch": 1.092765652524408,
5137
+ "grad_norm": 3.9391825199127197,
5138
+ "learning_rate": 0.0001895445116829239,
5139
+ "loss": 1.2973,
5140
+ "step": 36600
5141
+ },
5142
+ {
5143
+ "epoch": 1.0942585017764905,
5144
+ "grad_norm": 6.774008274078369,
5145
+ "learning_rate": 0.00018953022588700507,
5146
+ "loss": 1.2801,
5147
+ "step": 36650
5148
+ },
5149
+ {
5150
+ "epoch": 1.0957513510285732,
5151
+ "grad_norm": 4.565464496612549,
5152
+ "learning_rate": 0.00018951594009108624,
5153
+ "loss": 1.3718,
5154
+ "step": 36700
5155
+ },
5156
+ {
5157
+ "epoch": 1.0972442002806557,
5158
+ "grad_norm": 5.68241548538208,
5159
+ "learning_rate": 0.00018950165429516743,
5160
+ "loss": 1.2928,
5161
+ "step": 36750
5162
+ },
5163
+ {
5164
+ "epoch": 1.0987370495327382,
5165
+ "grad_norm": 4.235448360443115,
5166
+ "learning_rate": 0.00018948736849924857,
5167
+ "loss": 1.3097,
5168
+ "step": 36800
5169
+ },
5170
+ {
5171
+ "epoch": 1.1002298987848207,
5172
+ "grad_norm": 4.672369003295898,
5173
+ "learning_rate": 0.00018947308270332976,
5174
+ "loss": 1.2875,
5175
+ "step": 36850
5176
+ },
5177
+ {
5178
+ "epoch": 1.1017227480369032,
5179
+ "grad_norm": 6.449750900268555,
5180
+ "learning_rate": 0.0001894587969074109,
5181
+ "loss": 1.2823,
5182
+ "step": 36900
5183
+ },
5184
+ {
5185
+ "epoch": 1.1032155972889857,
5186
+ "grad_norm": 5.093149662017822,
5187
+ "learning_rate": 0.0001894445111114921,
5188
+ "loss": 1.315,
5189
+ "step": 36950
5190
+ },
5191
+ {
5192
+ "epoch": 1.1047084465410684,
5193
+ "grad_norm": 5.433828353881836,
5194
+ "learning_rate": 0.00018943022531557323,
5195
+ "loss": 1.2781,
5196
+ "step": 37000
5197
+ },
5198
+ {
5199
+ "epoch": 1.1062012957931509,
5200
+ "grad_norm": 5.176681995391846,
5201
+ "learning_rate": 0.00018941593951965442,
5202
+ "loss": 1.2652,
5203
+ "step": 37050
5204
+ },
5205
+ {
5206
+ "epoch": 1.1076941450452333,
5207
+ "grad_norm": 4.09026575088501,
5208
+ "learning_rate": 0.00018940165372373558,
5209
+ "loss": 1.3045,
5210
+ "step": 37100
5211
+ },
5212
+ {
5213
+ "epoch": 1.1091869942973158,
5214
+ "grad_norm": 5.223085880279541,
5215
+ "learning_rate": 0.00018938736792781675,
5216
+ "loss": 1.3009,
5217
+ "step": 37150
5218
+ },
5219
+ {
5220
+ "epoch": 1.1106798435493983,
5221
+ "grad_norm": 5.105752944946289,
5222
+ "learning_rate": 0.0001893730821318979,
5223
+ "loss": 1.2915,
5224
+ "step": 37200
5225
+ },
5226
+ {
5227
+ "epoch": 1.1121726928014808,
5228
+ "grad_norm": 3.945962905883789,
5229
+ "learning_rate": 0.00018935879633597908,
5230
+ "loss": 1.341,
5231
+ "step": 37250
5232
+ },
5233
+ {
5234
+ "epoch": 1.1136655420535635,
5235
+ "grad_norm": 4.856802463531494,
5236
+ "learning_rate": 0.00018934451054006024,
5237
+ "loss": 1.3318,
5238
+ "step": 37300
5239
+ },
5240
+ {
5241
+ "epoch": 1.115158391305646,
5242
+ "grad_norm": 4.858597755432129,
5243
+ "learning_rate": 0.0001893302247441414,
5244
+ "loss": 1.2756,
5245
+ "step": 37350
5246
+ },
5247
+ {
5248
+ "epoch": 1.1166512405577285,
5249
+ "grad_norm": 4.594838619232178,
5250
+ "learning_rate": 0.00018931593894822257,
5251
+ "loss": 1.2712,
5252
+ "step": 37400
5253
+ },
5254
+ {
5255
+ "epoch": 1.118144089809811,
5256
+ "grad_norm": 4.745405673980713,
5257
+ "learning_rate": 0.00018930165315230374,
5258
+ "loss": 1.2878,
5259
+ "step": 37450
5260
+ },
5261
+ {
5262
+ "epoch": 1.1196369390618934,
5263
+ "grad_norm": 4.112718105316162,
5264
+ "learning_rate": 0.0001892873673563849,
5265
+ "loss": 1.3337,
5266
+ "step": 37500
5267
+ },
5268
+ {
5269
+ "epoch": 1.1211297883139761,
5270
+ "grad_norm": 4.297769069671631,
5271
+ "learning_rate": 0.00018927308156046607,
5272
+ "loss": 1.3324,
5273
+ "step": 37550
5274
+ },
5275
+ {
5276
+ "epoch": 1.1226226375660586,
5277
+ "grad_norm": 5.46763801574707,
5278
+ "learning_rate": 0.00018925879576454723,
5279
+ "loss": 1.276,
5280
+ "step": 37600
5281
+ },
5282
+ {
5283
+ "epoch": 1.124115486818141,
5284
+ "grad_norm": 5.648976802825928,
5285
+ "learning_rate": 0.0001892445099686284,
5286
+ "loss": 1.3238,
5287
+ "step": 37650
5288
+ },
5289
+ {
5290
+ "epoch": 1.1256083360702236,
5291
+ "grad_norm": 4.185100078582764,
5292
+ "learning_rate": 0.00018923022417270956,
5293
+ "loss": 1.2848,
5294
+ "step": 37700
5295
+ },
5296
+ {
5297
+ "epoch": 1.127101185322306,
5298
+ "grad_norm": 4.676313877105713,
5299
+ "learning_rate": 0.00018921593837679072,
5300
+ "loss": 1.3038,
5301
+ "step": 37750
5302
+ },
5303
+ {
5304
+ "epoch": 1.1285940345743888,
5305
+ "grad_norm": 4.426568508148193,
5306
+ "learning_rate": 0.00018920165258087192,
5307
+ "loss": 1.3343,
5308
+ "step": 37800
5309
+ },
5310
+ {
5311
+ "epoch": 1.1300868838264713,
5312
+ "grad_norm": 4.887205600738525,
5313
+ "learning_rate": 0.00018918736678495305,
5314
+ "loss": 1.3456,
5315
+ "step": 37850
5316
+ },
5317
+ {
5318
+ "epoch": 1.1315797330785538,
5319
+ "grad_norm": 5.455615043640137,
5320
+ "learning_rate": 0.00018917308098903424,
5321
+ "loss": 1.3258,
5322
+ "step": 37900
5323
+ },
5324
+ {
5325
+ "epoch": 1.1330725823306362,
5326
+ "grad_norm": 5.676678657531738,
5327
+ "learning_rate": 0.00018915879519311538,
5328
+ "loss": 1.3702,
5329
+ "step": 37950
5330
+ },
5331
+ {
5332
+ "epoch": 1.1345654315827187,
5333
+ "grad_norm": 3.9528415203094482,
5334
+ "learning_rate": 0.00018914450939719657,
5335
+ "loss": 1.3097,
5336
+ "step": 38000
5337
+ },
5338
+ {
5339
+ "epoch": 1.1360582808348014,
5340
+ "grad_norm": 4.538026332855225,
5341
+ "learning_rate": 0.0001891302236012777,
5342
+ "loss": 1.316,
5343
+ "step": 38050
5344
+ },
5345
+ {
5346
+ "epoch": 1.137551130086884,
5347
+ "grad_norm": 4.46422815322876,
5348
+ "learning_rate": 0.0001891159378053589,
5349
+ "loss": 1.2479,
5350
+ "step": 38100
5351
+ },
5352
+ {
5353
+ "epoch": 1.1390439793389664,
5354
+ "grad_norm": 4.823958396911621,
5355
+ "learning_rate": 0.00018910165200944007,
5356
+ "loss": 1.2616,
5357
+ "step": 38150
5358
+ },
5359
+ {
5360
+ "epoch": 1.1405368285910489,
5361
+ "grad_norm": 5.7128167152404785,
5362
+ "learning_rate": 0.00018908736621352123,
5363
+ "loss": 1.2853,
5364
+ "step": 38200
5365
+ },
5366
+ {
5367
+ "epoch": 1.1420296778431314,
5368
+ "grad_norm": 3.5420877933502197,
5369
+ "learning_rate": 0.0001890730804176024,
5370
+ "loss": 1.3008,
5371
+ "step": 38250
5372
+ },
5373
+ {
5374
+ "epoch": 1.1435225270952138,
5375
+ "grad_norm": 4.617981910705566,
5376
+ "learning_rate": 0.00018905879462168356,
5377
+ "loss": 1.2888,
5378
+ "step": 38300
5379
+ },
5380
+ {
5381
+ "epoch": 1.1450153763472963,
5382
+ "grad_norm": 4.542544364929199,
5383
+ "learning_rate": 0.00018904450882576473,
5384
+ "loss": 1.3009,
5385
+ "step": 38350
5386
+ },
5387
+ {
5388
+ "epoch": 1.146508225599379,
5389
+ "grad_norm": 4.301428318023682,
5390
+ "learning_rate": 0.0001890302230298459,
5391
+ "loss": 1.2834,
5392
+ "step": 38400
5393
+ },
5394
+ {
5395
+ "epoch": 1.1480010748514615,
5396
+ "grad_norm": 4.136764049530029,
5397
+ "learning_rate": 0.00018901593723392706,
5398
+ "loss": 1.3895,
5399
+ "step": 38450
5400
+ },
5401
+ {
5402
+ "epoch": 1.149493924103544,
5403
+ "grad_norm": 5.59256649017334,
5404
+ "learning_rate": 0.00018900165143800822,
5405
+ "loss": 1.295,
5406
+ "step": 38500
5407
+ },
5408
+ {
5409
+ "epoch": 1.1509867733556265,
5410
+ "grad_norm": 3.4925365447998047,
5411
+ "learning_rate": 0.00018898736564208939,
5412
+ "loss": 1.3385,
5413
+ "step": 38550
5414
+ },
5415
+ {
5416
+ "epoch": 1.152479622607709,
5417
+ "grad_norm": 4.884555816650391,
5418
+ "learning_rate": 0.00018897307984617058,
5419
+ "loss": 1.2776,
5420
+ "step": 38600
5421
+ },
5422
+ {
5423
+ "epoch": 1.1539724718597917,
5424
+ "grad_norm": 3.866908550262451,
5425
+ "learning_rate": 0.00018895879405025171,
5426
+ "loss": 1.3173,
5427
+ "step": 38650
5428
+ },
5429
+ {
5430
+ "epoch": 1.1554653211118742,
5431
+ "grad_norm": 4.8104939460754395,
5432
+ "learning_rate": 0.0001889445082543329,
5433
+ "loss": 1.2988,
5434
+ "step": 38700
5435
+ },
5436
+ {
5437
+ "epoch": 1.1569581703639567,
5438
+ "grad_norm": 3.814675807952881,
5439
+ "learning_rate": 0.00018893022245841404,
5440
+ "loss": 1.348,
5441
+ "step": 38750
5442
+ },
5443
+ {
5444
+ "epoch": 1.1584510196160391,
5445
+ "grad_norm": 5.720306873321533,
5446
+ "learning_rate": 0.00018891593666249524,
5447
+ "loss": 1.2844,
5448
+ "step": 38800
5449
+ },
5450
+ {
5451
+ "epoch": 1.1599438688681216,
5452
+ "grad_norm": 4.06850528717041,
5453
+ "learning_rate": 0.00018890165086657637,
5454
+ "loss": 1.3514,
5455
+ "step": 38850
5456
+ },
5457
+ {
5458
+ "epoch": 1.1614367181202043,
5459
+ "grad_norm": 6.193358898162842,
5460
+ "learning_rate": 0.00018888736507065757,
5461
+ "loss": 1.3587,
5462
+ "step": 38900
5463
+ },
5464
+ {
5465
+ "epoch": 1.1629295673722868,
5466
+ "grad_norm": 4.8998212814331055,
5467
+ "learning_rate": 0.00018887307927473873,
5468
+ "loss": 1.317,
5469
+ "step": 38950
5470
+ },
5471
+ {
5472
+ "epoch": 1.1644224166243693,
5473
+ "grad_norm": 4.4293107986450195,
5474
+ "learning_rate": 0.0001888587934788199,
5475
+ "loss": 1.3473,
5476
+ "step": 39000
5477
+ },
5478
+ {
5479
+ "epoch": 1.1659152658764518,
5480
+ "grad_norm": 6.795536994934082,
5481
+ "learning_rate": 0.00018884450768290106,
5482
+ "loss": 1.2793,
5483
+ "step": 39050
5484
+ },
5485
+ {
5486
+ "epoch": 1.1674081151285343,
5487
+ "grad_norm": 4.158294200897217,
5488
+ "learning_rate": 0.00018883022188698222,
5489
+ "loss": 1.3175,
5490
+ "step": 39100
5491
+ },
5492
+ {
5493
+ "epoch": 1.168900964380617,
5494
+ "grad_norm": 5.839204788208008,
5495
+ "learning_rate": 0.0001888159360910634,
5496
+ "loss": 1.2931,
5497
+ "step": 39150
5498
+ },
5499
+ {
5500
+ "epoch": 1.1703938136326995,
5501
+ "grad_norm": 6.633917331695557,
5502
+ "learning_rate": 0.00018880165029514455,
5503
+ "loss": 1.3053,
5504
+ "step": 39200
5505
+ },
5506
+ {
5507
+ "epoch": 1.171886662884782,
5508
+ "grad_norm": 4.409125328063965,
5509
+ "learning_rate": 0.00018878736449922572,
5510
+ "loss": 1.3074,
5511
+ "step": 39250
5512
+ },
5513
+ {
5514
+ "epoch": 1.1733795121368644,
5515
+ "grad_norm": 4.820318698883057,
5516
+ "learning_rate": 0.00018877307870330688,
5517
+ "loss": 1.329,
5518
+ "step": 39300
5519
+ },
5520
+ {
5521
+ "epoch": 1.174872361388947,
5522
+ "grad_norm": 5.104337215423584,
5523
+ "learning_rate": 0.00018875879290738805,
5524
+ "loss": 1.2628,
5525
+ "step": 39350
5526
+ },
5527
+ {
5528
+ "epoch": 1.1763652106410294,
5529
+ "grad_norm": 5.449405670166016,
5530
+ "learning_rate": 0.00018874450711146924,
5531
+ "loss": 1.2672,
5532
+ "step": 39400
5533
+ },
5534
+ {
5535
+ "epoch": 1.1778580598931119,
5536
+ "grad_norm": 5.3521504402160645,
5537
+ "learning_rate": 0.00018873022131555038,
5538
+ "loss": 1.3236,
5539
+ "step": 39450
5540
+ },
5541
+ {
5542
+ "epoch": 1.1793509091451946,
5543
+ "grad_norm": 5.748175144195557,
5544
+ "learning_rate": 0.00018871593551963157,
5545
+ "loss": 1.284,
5546
+ "step": 39500
5547
+ },
5548
+ {
5549
+ "epoch": 1.180843758397277,
5550
+ "grad_norm": 4.418118953704834,
5551
+ "learning_rate": 0.0001887016497237127,
5552
+ "loss": 1.299,
5553
+ "step": 39550
5554
+ },
5555
+ {
5556
+ "epoch": 1.1823366076493596,
5557
+ "grad_norm": 4.267107009887695,
5558
+ "learning_rate": 0.0001886873639277939,
5559
+ "loss": 1.3287,
5560
+ "step": 39600
5561
+ },
5562
+ {
5563
+ "epoch": 1.183829456901442,
5564
+ "grad_norm": 4.607259273529053,
5565
+ "learning_rate": 0.00018867307813187503,
5566
+ "loss": 1.3087,
5567
+ "step": 39650
5568
+ },
5569
+ {
5570
+ "epoch": 1.1853223061535245,
5571
+ "grad_norm": 4.254667282104492,
5572
+ "learning_rate": 0.00018865879233595623,
5573
+ "loss": 1.3015,
5574
+ "step": 39700
5575
+ },
5576
+ {
5577
+ "epoch": 1.1868151554056072,
5578
+ "grad_norm": 5.307114124298096,
5579
+ "learning_rate": 0.0001886445065400374,
5580
+ "loss": 1.2802,
5581
+ "step": 39750
5582
+ },
5583
+ {
5584
+ "epoch": 1.1883080046576897,
5585
+ "grad_norm": 3.9305639266967773,
5586
+ "learning_rate": 0.00018863022074411856,
5587
+ "loss": 1.2975,
5588
+ "step": 39800
5589
+ },
5590
+ {
5591
+ "epoch": 1.1898008539097722,
5592
+ "grad_norm": 4.650544166564941,
5593
+ "learning_rate": 0.00018861593494819972,
5594
+ "loss": 1.3239,
5595
+ "step": 39850
5596
+ },
5597
+ {
5598
+ "epoch": 1.1912937031618547,
5599
+ "grad_norm": 4.182717800140381,
5600
+ "learning_rate": 0.00018860164915228086,
5601
+ "loss": 1.3948,
5602
+ "step": 39900
5603
+ },
5604
+ {
5605
+ "epoch": 1.1927865524139372,
5606
+ "grad_norm": 5.322524547576904,
5607
+ "learning_rate": 0.00018858736335636205,
5608
+ "loss": 1.3411,
5609
+ "step": 39950
5610
+ },
5611
+ {
5612
+ "epoch": 1.1942794016660199,
5613
+ "grad_norm": 5.221969127655029,
5614
+ "learning_rate": 0.0001885730775604432,
5615
+ "loss": 1.3047,
5616
+ "step": 40000
5617
  }
5618
  ],
5619
  "logging_steps": 50,
 
5633
  "attributes": {}
5634
  }
5635
  },
5636
+ "total_flos": 1.011899217623384e+18,
5637
  "train_batch_size": 2,
5638
  "trial_name": null,
5639
  "trial_params": null