OsamaMo commited on
Commit
c994d38
·
verified ·
1 Parent(s): cbddbd5

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3001072512cf0094aa413adef722b38d30d55f1d695532e69f11d0e79e17410
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd91de723bbd7ea7b7dfe87942ef4a89726bd5bdcfdd6abb72301f7a8513b562
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f6d21674325a8aaab36b8cf4642a6d5958787b319ca784ac8dfd0a1718a3756
3
  size 591203178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3bb5fe5d5fa250dea8b06ba61d861c69e2a49b2e59bd1f61d15f4b5e735dbc
3
  size 591203178
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3fcb8b7132fdda989f7bbb14a5bf464435849629fe731ccbc64c4724068a57e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec28ea0c416565eeac14a0e9c944f185ac250f4ed4bd15c84ff77ed78ba9301
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e579802271638ff75fe7ba64560b3e21e4f7e26236b794157498845ba12537a4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2285ab56c032a195010949adc0c02d4df3cdbb2c2798776a12286c09cf74f9be
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0714540907466952,
5
  "eval_steps": 100,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1177,6 +1177,396 @@
1177
  "eval_news_finetune_val_samples_per_second": 1.396,
1178
  "eval_news_finetune_val_steps_per_second": 1.396,
1179
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1180
  }
1181
  ],
1182
  "logging_steps": 10,
@@ -1196,7 +1586,7 @@
1196
  "attributes": {}
1197
  }
1198
  },
1199
- "total_flos": 4.14970883106816e+16,
1200
  "train_batch_size": 1,
1201
  "trial_name": null,
1202
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4287245444801715,
5
  "eval_steps": 100,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1177
  "eval_news_finetune_val_samples_per_second": 1.396,
1178
  "eval_news_finetune_val_steps_per_second": 1.396,
1179
  "step": 1500
1180
+ },
1181
+ {
1182
+ "epoch": 1.0785994998213648,
1183
+ "grad_norm": 1.7663507461547852,
1184
+ "learning_rate": 8.082020186215156e-05,
1185
+ "loss": 0.2407,
1186
+ "step": 1510
1187
+ },
1188
+ {
1189
+ "epoch": 1.0857449088960343,
1190
+ "grad_norm": 1.2081632614135742,
1191
+ "learning_rate": 8.049165944562316e-05,
1192
+ "loss": 0.2483,
1193
+ "step": 1520
1194
+ },
1195
+ {
1196
+ "epoch": 1.092890317970704,
1197
+ "grad_norm": 0.5045826435089111,
1198
+ "learning_rate": 8.016100750576621e-05,
1199
+ "loss": 0.2013,
1200
+ "step": 1530
1201
+ },
1202
+ {
1203
+ "epoch": 1.1000357270453733,
1204
+ "grad_norm": 1.4456278085708618,
1205
+ "learning_rate": 7.98282689182783e-05,
1206
+ "loss": 0.2034,
1207
+ "step": 1540
1208
+ },
1209
+ {
1210
+ "epoch": 1.107181136120043,
1211
+ "grad_norm": 1.1558668613433838,
1212
+ "learning_rate": 7.949346670321891e-05,
1213
+ "loss": 0.2386,
1214
+ "step": 1550
1215
+ },
1216
+ {
1217
+ "epoch": 1.1143265451947124,
1218
+ "grad_norm": 1.4196126461029053,
1219
+ "learning_rate": 7.915662402341664e-05,
1220
+ "loss": 0.2299,
1221
+ "step": 1560
1222
+ },
1223
+ {
1224
+ "epoch": 1.1214719542693818,
1225
+ "grad_norm": 0.9341222047805786,
1226
+ "learning_rate": 7.88177641828669e-05,
1227
+ "loss": 0.2105,
1228
+ "step": 1570
1229
+ },
1230
+ {
1231
+ "epoch": 1.1286173633440515,
1232
+ "grad_norm": 1.066001296043396,
1233
+ "learning_rate": 7.847691062511957e-05,
1234
+ "loss": 0.1925,
1235
+ "step": 1580
1236
+ },
1237
+ {
1238
+ "epoch": 1.135762772418721,
1239
+ "grad_norm": 0.7840182781219482,
1240
+ "learning_rate": 7.813408693165704e-05,
1241
+ "loss": 0.2425,
1242
+ "step": 1590
1243
+ },
1244
+ {
1245
+ "epoch": 1.1429081814933906,
1246
+ "grad_norm": 0.983668327331543,
1247
+ "learning_rate": 7.778931682026293e-05,
1248
+ "loss": 0.2014,
1249
+ "step": 1600
1250
+ },
1251
+ {
1252
+ "epoch": 1.1429081814933906,
1253
+ "eval_news_finetune_val_loss": 0.29564452171325684,
1254
+ "eval_news_finetune_val_runtime": 1003.001,
1255
+ "eval_news_finetune_val_samples_per_second": 1.396,
1256
+ "eval_news_finetune_val_steps_per_second": 1.396,
1257
+ "step": 1600
1258
+ },
1259
+ {
1260
+ "epoch": 1.15005359056806,
1261
+ "grad_norm": 1.63984215259552,
1262
+ "learning_rate": 7.744262414338099e-05,
1263
+ "loss": 0.2863,
1264
+ "step": 1610
1265
+ },
1266
+ {
1267
+ "epoch": 1.1571989996427297,
1268
+ "grad_norm": 0.9211621284484863,
1269
+ "learning_rate": 7.709403288646507e-05,
1270
+ "loss": 0.2175,
1271
+ "step": 1620
1272
+ },
1273
+ {
1274
+ "epoch": 1.164344408717399,
1275
+ "grad_norm": 1.3369996547698975,
1276
+ "learning_rate": 7.67435671663196e-05,
1277
+ "loss": 0.1893,
1278
+ "step": 1630
1279
+ },
1280
+ {
1281
+ "epoch": 1.1714898177920685,
1282
+ "grad_norm": 0.7532891631126404,
1283
+ "learning_rate": 7.63912512294312e-05,
1284
+ "loss": 0.2483,
1285
+ "step": 1640
1286
+ },
1287
+ {
1288
+ "epoch": 1.1786352268667382,
1289
+ "grad_norm": 1.0959442853927612,
1290
+ "learning_rate": 7.603710945029119e-05,
1291
+ "loss": 0.1888,
1292
+ "step": 1650
1293
+ },
1294
+ {
1295
+ "epoch": 1.1857806359414076,
1296
+ "grad_norm": 0.9019472599029541,
1297
+ "learning_rate": 7.568116632970922e-05,
1298
+ "loss": 0.2144,
1299
+ "step": 1660
1300
+ },
1301
+ {
1302
+ "epoch": 1.1929260450160772,
1303
+ "grad_norm": 1.1219818592071533,
1304
+ "learning_rate": 7.532344649311829e-05,
1305
+ "loss": 0.191,
1306
+ "step": 1670
1307
+ },
1308
+ {
1309
+ "epoch": 1.2000714540907467,
1310
+ "grad_norm": 1.0829100608825684,
1311
+ "learning_rate": 7.496397468887106e-05,
1312
+ "loss": 0.2762,
1313
+ "step": 1680
1314
+ },
1315
+ {
1316
+ "epoch": 1.2072168631654163,
1317
+ "grad_norm": 0.7855832576751709,
1318
+ "learning_rate": 7.460277578652759e-05,
1319
+ "loss": 0.157,
1320
+ "step": 1690
1321
+ },
1322
+ {
1323
+ "epoch": 1.2143622722400857,
1324
+ "grad_norm": 2.407999038696289,
1325
+ "learning_rate": 7.423987477513488e-05,
1326
+ "loss": 0.2627,
1327
+ "step": 1700
1328
+ },
1329
+ {
1330
+ "epoch": 1.2143622722400857,
1331
+ "eval_news_finetune_val_loss": 0.28248873353004456,
1332
+ "eval_news_finetune_val_runtime": 1003.1081,
1333
+ "eval_news_finetune_val_samples_per_second": 1.396,
1334
+ "eval_news_finetune_val_steps_per_second": 1.396,
1335
+ "step": 1700
1336
+ },
1337
+ {
1338
+ "epoch": 1.2215076813147552,
1339
+ "grad_norm": 1.5500895977020264,
1340
+ "learning_rate": 7.387529676149799e-05,
1341
+ "loss": 0.1477,
1342
+ "step": 1710
1343
+ },
1344
+ {
1345
+ "epoch": 1.2286530903894248,
1346
+ "grad_norm": 1.5599130392074585,
1347
+ "learning_rate": 7.350906696844307e-05,
1348
+ "loss": 0.1942,
1349
+ "step": 1720
1350
+ },
1351
+ {
1352
+ "epoch": 1.2357984994640943,
1353
+ "grad_norm": 1.6327091455459595,
1354
+ "learning_rate": 7.314121073307229e-05,
1355
+ "loss": 0.2,
1356
+ "step": 1730
1357
+ },
1358
+ {
1359
+ "epoch": 1.242943908538764,
1360
+ "grad_norm": 0.6044666767120361,
1361
+ "learning_rate": 7.277175350501111e-05,
1362
+ "loss": 0.185,
1363
+ "step": 1740
1364
+ },
1365
+ {
1366
+ "epoch": 1.2500893176134333,
1367
+ "grad_norm": 1.317089319229126,
1368
+ "learning_rate": 7.240072084464729e-05,
1369
+ "loss": 0.196,
1370
+ "step": 1750
1371
+ },
1372
+ {
1373
+ "epoch": 1.257234726688103,
1374
+ "grad_norm": 1.089105486869812,
1375
+ "learning_rate": 7.202813842136283e-05,
1376
+ "loss": 0.1322,
1377
+ "step": 1760
1378
+ },
1379
+ {
1380
+ "epoch": 1.2643801357627724,
1381
+ "grad_norm": 1.4972888231277466,
1382
+ "learning_rate": 7.165403201175787e-05,
1383
+ "loss": 0.2176,
1384
+ "step": 1770
1385
+ },
1386
+ {
1387
+ "epoch": 1.2715255448374418,
1388
+ "grad_norm": 1.4998830556869507,
1389
+ "learning_rate": 7.127842749786747e-05,
1390
+ "loss": 0.218,
1391
+ "step": 1780
1392
+ },
1393
+ {
1394
+ "epoch": 1.2786709539121115,
1395
+ "grad_norm": 0.9759517908096313,
1396
+ "learning_rate": 7.090135086537095e-05,
1397
+ "loss": 0.1653,
1398
+ "step": 1790
1399
+ },
1400
+ {
1401
+ "epoch": 1.285816362986781,
1402
+ "grad_norm": 0.9713583588600159,
1403
+ "learning_rate": 7.052282820179412e-05,
1404
+ "loss": 0.175,
1405
+ "step": 1800
1406
+ },
1407
+ {
1408
+ "epoch": 1.285816362986781,
1409
+ "eval_news_finetune_val_loss": 0.2936909794807434,
1410
+ "eval_news_finetune_val_runtime": 1003.12,
1411
+ "eval_news_finetune_val_samples_per_second": 1.396,
1412
+ "eval_news_finetune_val_steps_per_second": 1.396,
1413
+ "step": 1800
1414
+ },
1415
+ {
1416
+ "epoch": 1.2929617720614506,
1417
+ "grad_norm": 0.6328814625740051,
1418
+ "learning_rate": 7.014288569470446e-05,
1419
+ "loss": 0.1727,
1420
+ "step": 1810
1421
+ },
1422
+ {
1423
+ "epoch": 1.30010718113612,
1424
+ "grad_norm": 1.622104525566101,
1425
+ "learning_rate": 6.976154962989934e-05,
1426
+ "loss": 0.2363,
1427
+ "step": 1820
1428
+ },
1429
+ {
1430
+ "epoch": 1.3072525902107897,
1431
+ "grad_norm": 1.8254674673080444,
1432
+ "learning_rate": 6.937884638958757e-05,
1433
+ "loss": 0.1897,
1434
+ "step": 1830
1435
+ },
1436
+ {
1437
+ "epoch": 1.314397999285459,
1438
+ "grad_norm": 0.8813793063163757,
1439
+ "learning_rate": 6.899480245056396e-05,
1440
+ "loss": 0.2029,
1441
+ "step": 1840
1442
+ },
1443
+ {
1444
+ "epoch": 1.3215434083601285,
1445
+ "grad_norm": 0.7675999999046326,
1446
+ "learning_rate": 6.860944438237788e-05,
1447
+ "loss": 0.2025,
1448
+ "step": 1850
1449
+ },
1450
+ {
1451
+ "epoch": 1.3286888174347982,
1452
+ "grad_norm": 1.1973013877868652,
1453
+ "learning_rate": 6.82227988454948e-05,
1454
+ "loss": 0.2317,
1455
+ "step": 1860
1456
+ },
1457
+ {
1458
+ "epoch": 1.3358342265094676,
1459
+ "grad_norm": 0.7864009737968445,
1460
+ "learning_rate": 6.783489258945195e-05,
1461
+ "loss": 0.2318,
1462
+ "step": 1870
1463
+ },
1464
+ {
1465
+ "epoch": 1.3429796355841372,
1466
+ "grad_norm": 1.0866330862045288,
1467
+ "learning_rate": 6.74457524510077e-05,
1468
+ "loss": 0.1871,
1469
+ "step": 1880
1470
+ },
1471
+ {
1472
+ "epoch": 1.3501250446588067,
1473
+ "grad_norm": 0.8745126724243164,
1474
+ "learning_rate": 6.705540535228485e-05,
1475
+ "loss": 0.211,
1476
+ "step": 1890
1477
+ },
1478
+ {
1479
+ "epoch": 1.3572704537334763,
1480
+ "grad_norm": 1.3401581048965454,
1481
+ "learning_rate": 6.66638782989081e-05,
1482
+ "loss": 0.2307,
1483
+ "step": 1900
1484
+ },
1485
+ {
1486
+ "epoch": 1.3572704537334763,
1487
+ "eval_news_finetune_val_loss": 0.2787444591522217,
1488
+ "eval_news_finetune_val_runtime": 1002.9344,
1489
+ "eval_news_finetune_val_samples_per_second": 1.396,
1490
+ "eval_news_finetune_val_steps_per_second": 1.396,
1491
+ "step": 1900
1492
+ },
1493
+ {
1494
+ "epoch": 1.3644158628081458,
1495
+ "grad_norm": 0.6149284839630127,
1496
+ "learning_rate": 6.627119837813564e-05,
1497
+ "loss": 0.2128,
1498
+ "step": 1910
1499
+ },
1500
+ {
1501
+ "epoch": 1.3715612718828152,
1502
+ "grad_norm": 1.7847625017166138,
1503
+ "learning_rate": 6.587739275698525e-05,
1504
+ "loss": 0.1551,
1505
+ "step": 1920
1506
+ },
1507
+ {
1508
+ "epoch": 1.3787066809574848,
1509
+ "grad_norm": 1.1973716020584106,
1510
+ "learning_rate": 6.54824886803547e-05,
1511
+ "loss": 0.2335,
1512
+ "step": 1930
1513
+ },
1514
+ {
1515
+ "epoch": 1.3858520900321543,
1516
+ "grad_norm": 1.5757859945297241,
1517
+ "learning_rate": 6.508651346913687e-05,
1518
+ "loss": 0.1504,
1519
+ "step": 1940
1520
+ },
1521
+ {
1522
+ "epoch": 1.392997499106824,
1523
+ "grad_norm": 1.7269341945648193,
1524
+ "learning_rate": 6.468949451832968e-05,
1525
+ "loss": 0.2679,
1526
+ "step": 1950
1527
+ },
1528
+ {
1529
+ "epoch": 1.4001429081814933,
1530
+ "grad_norm": 1.6860129833221436,
1531
+ "learning_rate": 6.429145929514063e-05,
1532
+ "loss": 0.1942,
1533
+ "step": 1960
1534
+ },
1535
+ {
1536
+ "epoch": 1.407288317256163,
1537
+ "grad_norm": 1.1732631921768188,
1538
+ "learning_rate": 6.389243533708671e-05,
1539
+ "loss": 0.2025,
1540
+ "step": 1970
1541
+ },
1542
+ {
1543
+ "epoch": 1.4144337263308324,
1544
+ "grad_norm": 0.9073033332824707,
1545
+ "learning_rate": 6.349245025008912e-05,
1546
+ "loss": 0.1836,
1547
+ "step": 1980
1548
+ },
1549
+ {
1550
+ "epoch": 1.4215791354055018,
1551
+ "grad_norm": 1.133843183517456,
1552
+ "learning_rate": 6.309153170656342e-05,
1553
+ "loss": 0.1526,
1554
+ "step": 1990
1555
+ },
1556
+ {
1557
+ "epoch": 1.4287245444801715,
1558
+ "grad_norm": 2.656296968460083,
1559
+ "learning_rate": 6.268970744350515e-05,
1560
+ "loss": 0.1939,
1561
+ "step": 2000
1562
+ },
1563
+ {
1564
+ "epoch": 1.4287245444801715,
1565
+ "eval_news_finetune_val_loss": 0.27414408326148987,
1566
+ "eval_news_finetune_val_runtime": 1003.0949,
1567
+ "eval_news_finetune_val_samples_per_second": 1.396,
1568
+ "eval_news_finetune_val_steps_per_second": 1.396,
1569
+ "step": 2000
1570
  }
1571
  ],
1572
  "logging_steps": 10,
 
1586
  "attributes": {}
1587
  }
1588
  },
1589
+ "total_flos": 5.538125336973312e+16,
1590
  "train_batch_size": 1,
1591
  "trial_name": null,
1592
  "trial_params": null