Stuti103 commited on
Commit
8699eba
·
verified ·
1 Parent(s): 1b9f9ea

Training in progress, step 19500, checkpoint

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  checkpoint-19500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  checkpoint-19500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86a687945c356111f110eb75094ca4616193d42a65db6db7ed2a691faef0b93e
3
  size 3541119728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec21d65cc35c5b2575c9f28438a7f3e71903280791cff6024d6130cc74f123b
3
  size 3541119728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d9df83617495c79e4dc8f3ffe12c38a59032f6d8dda39ca67cd8e1645830cbd
3
  size 778374186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2251652726c40e6dfcb11baba340311083aebf1f86c81a23f7dc9d79eac124bb
3
  size 778374186
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f93ad49f130faa5ef7550fd7a655f94d7bbf6af793eda1fc6b0800655d80e2cf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6a16ef822fb8170c7c05fd4d1180b525bf072c302b69d261a30b0549778c78
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9162600365072359,
6
  "eval_steps": 500,
7
- "global_step": 19200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -17288,6 +17288,276 @@
17288
  "mean_token_accuracy": 0.860031221807003,
17289
  "num_tokens": 31894612.0,
17290
  "step": 19200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17291
  }
17292
  ],
17293
  "logging_steps": 10,
@@ -17307,7 +17577,7 @@
17307
  "attributes": {}
17308
  }
17309
  },
17310
- "total_flos": 7.182831922580398e+17,
17311
  "train_batch_size": 2,
17312
  "trial_name": null,
17313
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9305765995776614,
6
  "eval_steps": 500,
7
+ "global_step": 19500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
17288
  "mean_token_accuracy": 0.860031221807003,
17289
  "num_tokens": 31894612.0,
17290
  "step": 19200
17291
+ },
17292
+ {
17293
+ "epoch": 0.91673725527625,
17294
+ "grad_norm": 0.37166985869407654,
17295
+ "learning_rate": 1.0833214030064425e-05,
17296
+ "loss": 0.5797,
17297
+ "mean_token_accuracy": 0.8837983384728432,
17298
+ "num_tokens": 31910332.0,
17299
+ "step": 19210
17300
+ },
17301
+ {
17302
+ "epoch": 0.9172144740452642,
17303
+ "grad_norm": 0.3820003271102905,
17304
+ "learning_rate": 1.0828441899308041e-05,
17305
+ "loss": 0.611,
17306
+ "mean_token_accuracy": 0.8845193833112717,
17307
+ "num_tokens": 31926866.0,
17308
+ "step": 19220
17309
+ },
17310
+ {
17311
+ "epoch": 0.9176916928142784,
17312
+ "grad_norm": 0.3208041489124298,
17313
+ "learning_rate": 1.082366976855166e-05,
17314
+ "loss": 0.6218,
17315
+ "mean_token_accuracy": 0.8753577992320061,
17316
+ "num_tokens": 31944162.0,
17317
+ "step": 19230
17318
+ },
17319
+ {
17320
+ "epoch": 0.9181689115832926,
17321
+ "grad_norm": 0.4074268639087677,
17322
+ "learning_rate": 1.0818897637795276e-05,
17323
+ "loss": 0.5728,
17324
+ "mean_token_accuracy": 0.8923567131161689,
17325
+ "num_tokens": 31959882.0,
17326
+ "step": 19240
17327
+ },
17328
+ {
17329
+ "epoch": 0.9186461303523068,
17330
+ "grad_norm": 0.4497404992580414,
17331
+ "learning_rate": 1.0814125507038893e-05,
17332
+ "loss": 0.5369,
17333
+ "mean_token_accuracy": 0.8921236410737038,
17334
+ "num_tokens": 31974914.0,
17335
+ "step": 19250
17336
+ },
17337
+ {
17338
+ "epoch": 0.919123349121321,
17339
+ "grad_norm": 0.38203802704811096,
17340
+ "learning_rate": 1.0809353376282511e-05,
17341
+ "loss": 0.6455,
17342
+ "mean_token_accuracy": 0.8690215855836868,
17343
+ "num_tokens": 31990678.0,
17344
+ "step": 19260
17345
+ },
17346
+ {
17347
+ "epoch": 0.9196005678903352,
17348
+ "grad_norm": 0.32773557305336,
17349
+ "learning_rate": 1.080458124552613e-05,
17350
+ "loss": 0.669,
17351
+ "mean_token_accuracy": 0.870218101143837,
17352
+ "num_tokens": 32007764.0,
17353
+ "step": 19270
17354
+ },
17355
+ {
17356
+ "epoch": 0.9200777866593494,
17357
+ "grad_norm": 0.4381488561630249,
17358
+ "learning_rate": 1.0799809114769746e-05,
17359
+ "loss": 0.6339,
17360
+ "mean_token_accuracy": 0.8728301003575325,
17361
+ "num_tokens": 32024411.0,
17362
+ "step": 19280
17363
+ },
17364
+ {
17365
+ "epoch": 0.9205550054283635,
17366
+ "grad_norm": 0.4450734257698059,
17367
+ "learning_rate": 1.0795036984013363e-05,
17368
+ "loss": 0.777,
17369
+ "mean_token_accuracy": 0.8429723799228668,
17370
+ "num_tokens": 32043443.0,
17371
+ "step": 19290
17372
+ },
17373
+ {
17374
+ "epoch": 0.9210322241973777,
17375
+ "grad_norm": 0.31893327832221985,
17376
+ "learning_rate": 1.079026485325698e-05,
17377
+ "loss": 0.7236,
17378
+ "mean_token_accuracy": 0.859081144630909,
17379
+ "num_tokens": 32062229.0,
17380
+ "step": 19300
17381
+ },
17382
+ {
17383
+ "epoch": 0.9215094429663919,
17384
+ "grad_norm": 0.3973105251789093,
17385
+ "learning_rate": 1.0785492722500596e-05,
17386
+ "loss": 0.5872,
17387
+ "mean_token_accuracy": 0.871922855079174,
17388
+ "num_tokens": 32079383.0,
17389
+ "step": 19310
17390
+ },
17391
+ {
17392
+ "epoch": 0.921986661735406,
17393
+ "grad_norm": 0.295210063457489,
17394
+ "learning_rate": 1.0780720591744213e-05,
17395
+ "loss": 0.5652,
17396
+ "mean_token_accuracy": 0.8859059333801269,
17397
+ "num_tokens": 32093751.0,
17398
+ "step": 19320
17399
+ },
17400
+ {
17401
+ "epoch": 0.9224638805044202,
17402
+ "grad_norm": 0.3628122806549072,
17403
+ "learning_rate": 1.0775948460987833e-05,
17404
+ "loss": 0.6767,
17405
+ "mean_token_accuracy": 0.8768541231751442,
17406
+ "num_tokens": 32109295.0,
17407
+ "step": 19330
17408
+ },
17409
+ {
17410
+ "epoch": 0.9229410992734344,
17411
+ "grad_norm": 0.3489735424518585,
17412
+ "learning_rate": 1.077117633023145e-05,
17413
+ "loss": 0.7395,
17414
+ "mean_token_accuracy": 0.8563176363706588,
17415
+ "num_tokens": 32127172.0,
17416
+ "step": 19340
17417
+ },
17418
+ {
17419
+ "epoch": 0.9234183180424486,
17420
+ "grad_norm": 0.5454393625259399,
17421
+ "learning_rate": 1.0766404199475067e-05,
17422
+ "loss": 0.7232,
17423
+ "mean_token_accuracy": 0.8605073913931847,
17424
+ "num_tokens": 32145650.0,
17425
+ "step": 19350
17426
+ },
17427
+ {
17428
+ "epoch": 0.9238955368114627,
17429
+ "grad_norm": 0.5151296854019165,
17430
+ "learning_rate": 1.0761632068718683e-05,
17431
+ "loss": 0.7183,
17432
+ "mean_token_accuracy": 0.8582200676202774,
17433
+ "num_tokens": 32163210.0,
17434
+ "step": 19360
17435
+ },
17436
+ {
17437
+ "epoch": 0.9243727555804769,
17438
+ "grad_norm": 0.4276362359523773,
17439
+ "learning_rate": 1.07568599379623e-05,
17440
+ "loss": 0.617,
17441
+ "mean_token_accuracy": 0.8801989361643792,
17442
+ "num_tokens": 32179260.0,
17443
+ "step": 19370
17444
+ },
17445
+ {
17446
+ "epoch": 0.9248499743494911,
17447
+ "grad_norm": 0.3694617450237274,
17448
+ "learning_rate": 1.0752087807205918e-05,
17449
+ "loss": 0.6555,
17450
+ "mean_token_accuracy": 0.8701679021120071,
17451
+ "num_tokens": 32196081.0,
17452
+ "step": 19380
17453
+ },
17454
+ {
17455
+ "epoch": 0.9253271931185053,
17456
+ "grad_norm": 0.34691864252090454,
17457
+ "learning_rate": 1.0747315676449535e-05,
17458
+ "loss": 0.6075,
17459
+ "mean_token_accuracy": 0.886542621254921,
17460
+ "num_tokens": 32212271.0,
17461
+ "step": 19390
17462
+ },
17463
+ {
17464
+ "epoch": 0.9258044118875195,
17465
+ "grad_norm": 0.31945309042930603,
17466
+ "learning_rate": 1.0742543545693153e-05,
17467
+ "loss": 0.5085,
17468
+ "mean_token_accuracy": 0.9026155725121499,
17469
+ "num_tokens": 32227305.0,
17470
+ "step": 19400
17471
+ },
17472
+ {
17473
+ "epoch": 0.9262816306565337,
17474
+ "grad_norm": 0.3226480782032013,
17475
+ "learning_rate": 1.073777141493677e-05,
17476
+ "loss": 0.6435,
17477
+ "mean_token_accuracy": 0.8802873864769936,
17478
+ "num_tokens": 32243276.0,
17479
+ "step": 19410
17480
+ },
17481
+ {
17482
+ "epoch": 0.9267588494255479,
17483
+ "grad_norm": 0.44026854634284973,
17484
+ "learning_rate": 1.0732999284180388e-05,
17485
+ "loss": 0.622,
17486
+ "mean_token_accuracy": 0.8743364945054054,
17487
+ "num_tokens": 32260578.0,
17488
+ "step": 19420
17489
+ },
17490
+ {
17491
+ "epoch": 0.9272360681945621,
17492
+ "grad_norm": 0.29511240124702454,
17493
+ "learning_rate": 1.0728227153424005e-05,
17494
+ "loss": 0.6461,
17495
+ "mean_token_accuracy": 0.8643220156431198,
17496
+ "num_tokens": 32277850.0,
17497
+ "step": 19430
17498
+ },
17499
+ {
17500
+ "epoch": 0.9277132869635762,
17501
+ "grad_norm": 0.3299635946750641,
17502
+ "learning_rate": 1.0723455022667622e-05,
17503
+ "loss": 0.6406,
17504
+ "mean_token_accuracy": 0.8653289705514908,
17505
+ "num_tokens": 32295838.0,
17506
+ "step": 19440
17507
+ },
17508
+ {
17509
+ "epoch": 0.9281905057325904,
17510
+ "grad_norm": 0.3476797044277191,
17511
+ "learning_rate": 1.0718682891911238e-05,
17512
+ "loss": 0.6175,
17513
+ "mean_token_accuracy": 0.8700507491827011,
17514
+ "num_tokens": 32312699.0,
17515
+ "step": 19450
17516
+ },
17517
+ {
17518
+ "epoch": 0.9286677245016046,
17519
+ "grad_norm": 0.4377439320087433,
17520
+ "learning_rate": 1.0713910761154858e-05,
17521
+ "loss": 0.5511,
17522
+ "mean_token_accuracy": 0.8886492669582366,
17523
+ "num_tokens": 32328353.0,
17524
+ "step": 19460
17525
+ },
17526
+ {
17527
+ "epoch": 0.9291449432706188,
17528
+ "grad_norm": 0.41651830077171326,
17529
+ "learning_rate": 1.0709138630398475e-05,
17530
+ "loss": 0.6338,
17531
+ "mean_token_accuracy": 0.8774713531136513,
17532
+ "num_tokens": 32343869.0,
17533
+ "step": 19470
17534
+ },
17535
+ {
17536
+ "epoch": 0.929622162039633,
17537
+ "grad_norm": 0.45593252778053284,
17538
+ "learning_rate": 1.0704366499642092e-05,
17539
+ "loss": 0.6098,
17540
+ "mean_token_accuracy": 0.8867579936981201,
17541
+ "num_tokens": 32360245.0,
17542
+ "step": 19480
17543
+ },
17544
+ {
17545
+ "epoch": 0.9300993808086472,
17546
+ "grad_norm": 0.5481681227684021,
17547
+ "learning_rate": 1.0699594368885708e-05,
17548
+ "loss": 0.7036,
17549
+ "mean_token_accuracy": 0.8724937483668327,
17550
+ "num_tokens": 32376506.0,
17551
+ "step": 19490
17552
+ },
17553
+ {
17554
+ "epoch": 0.9305765995776614,
17555
+ "grad_norm": 0.4363495409488678,
17556
+ "learning_rate": 1.0694822238129325e-05,
17557
+ "loss": 0.6826,
17558
+ "mean_token_accuracy": 0.8660887077450752,
17559
+ "num_tokens": 32394083.0,
17560
+ "step": 19500
17561
  }
17562
  ],
17563
  "logging_steps": 10,
 
17577
  "attributes": {}
17578
  }
17579
  },
17580
+ "total_flos": 7.29501813251924e+17,
17581
  "train_batch_size": 2,
17582
  "trial_name": null,
17583
  "trial_params": null