dada22231 commited on
Commit
b46bb40
·
verified ·
1 Parent(s): a319e31

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f943437f6682f7ee7de160a0fb0dd7f1a7f9b7c1e5cb4bc2beb7dea62371d6
3
  size 166182480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a33954a949e48ae07c864630d71b35f52ef96e34d857fa2e6fec7f98fe356da
3
  size 166182480
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:844a482769f0e2abd40ad13846907ca880baf715442ba45b6595fb1d2992d4c5
3
  size 332574358
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa8f7d7ec182d15a1292055a6498657aec231fbd647506cb8eb0d6794f5cc8d4
3
  size 332574358
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eafd79e713363198747e3075dba58fd79e2d3d1300105438732b2e0ee683c97
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761540c8c7a3a2bb0f8059fd740b1f4ae73e8861f5dea25483cd38099982f051
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4a1fd21f28f132126ba0a3c0e9b7867579d24daca77e2d8e14bc16542a7a830
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45d5c4a17baee47c3f9590614e8ee9a911e28a39e8689047f51888fe2f78fe4
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d98523765fbe4ee7d22ab0a5ea68e4b5406a21e1b54e1b37a5358bbc38419dcd
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52fac183069a95e291b801cee9c4186c65f0d4ebf8f8ae19810e6841974db7a9
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1907ecded23c99f86058c3975d42bb30b996a62807a726ac4088c81af5fe16c9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c584c4176c8942dcdae24cd5d8ff148bb5dc8fa31ceff16ead102aff46da43
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9f689e3fd5df575eecb35b5e9b27f49e75a860ed67ef4150a0d2749c11e5d42
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7c10705e29fb389f187f5e1079f175bc14c7c722484dd2f977530ef7573f25
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.7154196686751675e-06,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-175",
4
- "epoch": 0.38655346172430455,
5
  "eval_steps": 25,
6
- "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1296,6 +1296,189 @@
1296
  "eval_samples_per_second": 23.985,
1297
  "eval_steps_per_second": 6.236,
1298
  "step": 175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1299
  }
1300
  ],
1301
  "logging_steps": 1,
@@ -1319,12 +1502,12 @@
1319
  "should_evaluate": false,
1320
  "should_log": false,
1321
  "should_save": true,
1322
- "should_training_stop": false
1323
  },
1324
  "attributes": {}
1325
  }
1326
  },
1327
- "total_flos": 5.68683130781696e+17,
1328
  "train_batch_size": 1,
1329
  "trial_name": null,
1330
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.660818836375256e-06,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.4417753848277766,
5
  "eval_steps": 25,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1296
  "eval_samples_per_second": 23.985,
1297
  "eval_steps_per_second": 6.236,
1298
  "step": 175
1299
+ },
1300
+ {
1301
+ "epoch": 0.38876233864844345,
1302
+ "grad_norm": 0.0001819442113628611,
1303
+ "learning_rate": 1.3425421036992098e-05,
1304
+ "loss": 0.0,
1305
+ "step": 176
1306
+ },
1307
+ {
1308
+ "epoch": 0.3909712155725823,
1309
+ "grad_norm": 0.00019746804900933057,
1310
+ "learning_rate": 1.314922493421946e-05,
1311
+ "loss": 0.0,
1312
+ "step": 177
1313
+ },
1314
+ {
1315
+ "epoch": 0.3931800924967212,
1316
+ "grad_norm": 0.0002262179768877104,
1317
+ "learning_rate": 1.2884233295920353e-05,
1318
+ "loss": 0.0,
1319
+ "step": 178
1320
+ },
1321
+ {
1322
+ "epoch": 0.3953889694208601,
1323
+ "grad_norm": 0.00020046616555191576,
1324
+ "learning_rate": 1.2630517066764069e-05,
1325
+ "loss": 0.0,
1326
+ "step": 179
1327
+ },
1328
+ {
1329
+ "epoch": 0.39759784634499895,
1330
+ "grad_norm": 0.0002561356814112514,
1331
+ "learning_rate": 1.2388144172720251e-05,
1332
+ "loss": 0.0,
1333
+ "step": 180
1334
+ },
1335
+ {
1336
+ "epoch": 0.39980672326913785,
1337
+ "grad_norm": 0.0003955549036618322,
1338
+ "learning_rate": 1.2157179502873409e-05,
1339
+ "loss": 0.0,
1340
+ "step": 181
1341
+ },
1342
+ {
1343
+ "epoch": 0.40201560019327676,
1344
+ "grad_norm": 0.00018629009719006717,
1345
+ "learning_rate": 1.1937684892050604e-05,
1346
+ "loss": 0.0,
1347
+ "step": 182
1348
+ },
1349
+ {
1350
+ "epoch": 0.4042244771174156,
1351
+ "grad_norm": 9.03993786778301e-05,
1352
+ "learning_rate": 1.172971910426671e-05,
1353
+ "loss": 0.0,
1354
+ "step": 183
1355
+ },
1356
+ {
1357
+ "epoch": 0.4064333540415545,
1358
+ "grad_norm": 8.99579026736319e-05,
1359
+ "learning_rate": 1.1533337816991932e-05,
1360
+ "loss": 0.0,
1361
+ "step": 184
1362
+ },
1363
+ {
1364
+ "epoch": 0.40864223096569335,
1365
+ "grad_norm": 8.806282130535692e-05,
1366
+ "learning_rate": 1.1348593606245522e-05,
1367
+ "loss": 0.0,
1368
+ "step": 185
1369
+ },
1370
+ {
1371
+ "epoch": 0.41085110788983226,
1372
+ "grad_norm": 8.73383687576279e-05,
1373
+ "learning_rate": 1.1175535932519987e-05,
1374
+ "loss": 0.0,
1375
+ "step": 186
1376
+ },
1377
+ {
1378
+ "epoch": 0.41305998481397116,
1379
+ "grad_norm": 8.832193998387083e-05,
1380
+ "learning_rate": 1.1014211127539271e-05,
1381
+ "loss": 0.0,
1382
+ "step": 187
1383
+ },
1384
+ {
1385
+ "epoch": 0.41526886173811,
1386
+ "grad_norm": 0.00011703837662935257,
1387
+ "learning_rate": 1.0864662381854632e-05,
1388
+ "loss": 0.0,
1389
+ "step": 188
1390
+ },
1391
+ {
1392
+ "epoch": 0.4174777386622489,
1393
+ "grad_norm": 0.0002513094514142722,
1394
+ "learning_rate": 1.0726929733281515e-05,
1395
+ "loss": 0.0,
1396
+ "step": 189
1397
+ },
1398
+ {
1399
+ "epoch": 0.4196866155863878,
1400
+ "grad_norm": 0.00021101209858898073,
1401
+ "learning_rate": 1.0601050056180447e-05,
1402
+ "loss": 0.0,
1403
+ "step": 190
1404
+ },
1405
+ {
1406
+ "epoch": 0.42189549251052666,
1407
+ "grad_norm": 0.00020866327395197004,
1408
+ "learning_rate": 1.0487057051584856e-05,
1409
+ "loss": 0.0,
1410
+ "step": 191
1411
+ },
1412
+ {
1413
+ "epoch": 0.42410436943466556,
1414
+ "grad_norm": 0.00022013194393366575,
1415
+ "learning_rate": 1.0384981238178534e-05,
1416
+ "loss": 0.0,
1417
+ "step": 192
1418
+ },
1419
+ {
1420
+ "epoch": 0.42631324635880447,
1421
+ "grad_norm": 0.00032330441172234714,
1422
+ "learning_rate": 1.0294849944125004e-05,
1423
+ "loss": 0.0,
1424
+ "step": 193
1425
+ },
1426
+ {
1427
+ "epoch": 0.4285221232829433,
1428
+ "grad_norm": 0.00035217651748098433,
1429
+ "learning_rate": 1.0216687299751144e-05,
1430
+ "loss": 0.0,
1431
+ "step": 194
1432
+ },
1433
+ {
1434
+ "epoch": 0.4307310002070822,
1435
+ "grad_norm": 8.39560671010986e-05,
1436
+ "learning_rate": 1.0150514231086887e-05,
1437
+ "loss": 0.0,
1438
+ "step": 195
1439
+ },
1440
+ {
1441
+ "epoch": 0.4329398771312211,
1442
+ "grad_norm": 8.635565609438345e-05,
1443
+ "learning_rate": 1.0096348454262845e-05,
1444
+ "loss": 0.0,
1445
+ "step": 196
1446
+ },
1447
+ {
1448
+ "epoch": 0.43514875405535997,
1449
+ "grad_norm": 8.677168807480484e-05,
1450
+ "learning_rate": 1.0054204470767243e-05,
1451
+ "loss": 0.0,
1452
+ "step": 197
1453
+ },
1454
+ {
1455
+ "epoch": 0.43735763097949887,
1456
+ "grad_norm": 8.660169260110706e-05,
1457
+ "learning_rate": 1.0024093563563546e-05,
1458
+ "loss": 0.0,
1459
+ "step": 198
1460
+ },
1461
+ {
1462
+ "epoch": 0.43956650790363777,
1463
+ "grad_norm": 8.837666246108711e-05,
1464
+ "learning_rate": 1.000602379406972e-05,
1465
+ "loss": 0.0,
1466
+ "step": 199
1467
+ },
1468
+ {
1469
+ "epoch": 0.4417753848277766,
1470
+ "grad_norm": 8.881120447767898e-05,
1471
+ "learning_rate": 1e-05,
1472
+ "loss": 0.0,
1473
+ "step": 200
1474
+ },
1475
+ {
1476
+ "epoch": 0.4417753848277766,
1477
+ "eval_loss": 2.660818836375256e-06,
1478
+ "eval_runtime": 2.0833,
1479
+ "eval_samples_per_second": 24.001,
1480
+ "eval_steps_per_second": 6.24,
1481
+ "step": 200
1482
  }
1483
  ],
1484
  "logging_steps": 1,
 
1502
  "should_evaluate": false,
1503
  "should_log": false,
1504
  "should_save": true,
1505
+ "should_training_stop": true
1506
  },
1507
  "attributes": {}
1508
  }
1509
  },
1510
+ "total_flos": 6.49923578036224e+17,
1511
  "train_batch_size": 1,
1512
  "trial_name": null,
1513
  "trial_params": null