Training in progress, step 200, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +189 -6
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 166182480
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a33954a949e48ae07c864630d71b35f52ef96e34d857fa2e6fec7f98fe356da
|
3 |
size 166182480
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 332574358
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa8f7d7ec182d15a1292055a6498657aec231fbd647506cb8eb0d6794f5cc8d4
|
3 |
size 332574358
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:761540c8c7a3a2bb0f8059fd740b1f4ae73e8861f5dea25483cd38099982f051
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b45d5c4a17baee47c3f9590614e8ee9a911e28a39e8689047f51888fe2f78fe4
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52fac183069a95e291b801cee9c4186c65f0d4ebf8f8ae19810e6841974db7a9
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60c584c4176c8942dcdae24cd5d8ff148bb5dc8fa31ceff16ead102aff46da43
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a7c10705e29fb389f187f5e1079f175bc14c7c722484dd2f977530ef7573f25
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 25,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1296,6 +1296,189 @@
|
|
1296 |
"eval_samples_per_second": 23.985,
|
1297 |
"eval_steps_per_second": 6.236,
|
1298 |
"step": 175
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1299 |
}
|
1300 |
],
|
1301 |
"logging_steps": 1,
|
@@ -1319,12 +1502,12 @@
|
|
1319 |
"should_evaluate": false,
|
1320 |
"should_log": false,
|
1321 |
"should_save": true,
|
1322 |
-
"should_training_stop":
|
1323 |
},
|
1324 |
"attributes": {}
|
1325 |
}
|
1326 |
},
|
1327 |
-
"total_flos":
|
1328 |
"train_batch_size": 1,
|
1329 |
"trial_name": null,
|
1330 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.660818836375256e-06,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-200",
|
4 |
+
"epoch": 0.4417753848277766,
|
5 |
"eval_steps": 25,
|
6 |
+
"global_step": 200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1296 |
"eval_samples_per_second": 23.985,
|
1297 |
"eval_steps_per_second": 6.236,
|
1298 |
"step": 175
|
1299 |
+
},
|
1300 |
+
{
|
1301 |
+
"epoch": 0.38876233864844345,
|
1302 |
+
"grad_norm": 0.0001819442113628611,
|
1303 |
+
"learning_rate": 1.3425421036992098e-05,
|
1304 |
+
"loss": 0.0,
|
1305 |
+
"step": 176
|
1306 |
+
},
|
1307 |
+
{
|
1308 |
+
"epoch": 0.3909712155725823,
|
1309 |
+
"grad_norm": 0.00019746804900933057,
|
1310 |
+
"learning_rate": 1.314922493421946e-05,
|
1311 |
+
"loss": 0.0,
|
1312 |
+
"step": 177
|
1313 |
+
},
|
1314 |
+
{
|
1315 |
+
"epoch": 0.3931800924967212,
|
1316 |
+
"grad_norm": 0.0002262179768877104,
|
1317 |
+
"learning_rate": 1.2884233295920353e-05,
|
1318 |
+
"loss": 0.0,
|
1319 |
+
"step": 178
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"epoch": 0.3953889694208601,
|
1323 |
+
"grad_norm": 0.00020046616555191576,
|
1324 |
+
"learning_rate": 1.2630517066764069e-05,
|
1325 |
+
"loss": 0.0,
|
1326 |
+
"step": 179
|
1327 |
+
},
|
1328 |
+
{
|
1329 |
+
"epoch": 0.39759784634499895,
|
1330 |
+
"grad_norm": 0.0002561356814112514,
|
1331 |
+
"learning_rate": 1.2388144172720251e-05,
|
1332 |
+
"loss": 0.0,
|
1333 |
+
"step": 180
|
1334 |
+
},
|
1335 |
+
{
|
1336 |
+
"epoch": 0.39980672326913785,
|
1337 |
+
"grad_norm": 0.0003955549036618322,
|
1338 |
+
"learning_rate": 1.2157179502873409e-05,
|
1339 |
+
"loss": 0.0,
|
1340 |
+
"step": 181
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 0.40201560019327676,
|
1344 |
+
"grad_norm": 0.00018629009719006717,
|
1345 |
+
"learning_rate": 1.1937684892050604e-05,
|
1346 |
+
"loss": 0.0,
|
1347 |
+
"step": 182
|
1348 |
+
},
|
1349 |
+
{
|
1350 |
+
"epoch": 0.4042244771174156,
|
1351 |
+
"grad_norm": 9.03993786778301e-05,
|
1352 |
+
"learning_rate": 1.172971910426671e-05,
|
1353 |
+
"loss": 0.0,
|
1354 |
+
"step": 183
|
1355 |
+
},
|
1356 |
+
{
|
1357 |
+
"epoch": 0.4064333540415545,
|
1358 |
+
"grad_norm": 8.99579026736319e-05,
|
1359 |
+
"learning_rate": 1.1533337816991932e-05,
|
1360 |
+
"loss": 0.0,
|
1361 |
+
"step": 184
|
1362 |
+
},
|
1363 |
+
{
|
1364 |
+
"epoch": 0.40864223096569335,
|
1365 |
+
"grad_norm": 8.806282130535692e-05,
|
1366 |
+
"learning_rate": 1.1348593606245522e-05,
|
1367 |
+
"loss": 0.0,
|
1368 |
+
"step": 185
|
1369 |
+
},
|
1370 |
+
{
|
1371 |
+
"epoch": 0.41085110788983226,
|
1372 |
+
"grad_norm": 8.73383687576279e-05,
|
1373 |
+
"learning_rate": 1.1175535932519987e-05,
|
1374 |
+
"loss": 0.0,
|
1375 |
+
"step": 186
|
1376 |
+
},
|
1377 |
+
{
|
1378 |
+
"epoch": 0.41305998481397116,
|
1379 |
+
"grad_norm": 8.832193998387083e-05,
|
1380 |
+
"learning_rate": 1.1014211127539271e-05,
|
1381 |
+
"loss": 0.0,
|
1382 |
+
"step": 187
|
1383 |
+
},
|
1384 |
+
{
|
1385 |
+
"epoch": 0.41526886173811,
|
1386 |
+
"grad_norm": 0.00011703837662935257,
|
1387 |
+
"learning_rate": 1.0864662381854632e-05,
|
1388 |
+
"loss": 0.0,
|
1389 |
+
"step": 188
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 0.4174777386622489,
|
1393 |
+
"grad_norm": 0.0002513094514142722,
|
1394 |
+
"learning_rate": 1.0726929733281515e-05,
|
1395 |
+
"loss": 0.0,
|
1396 |
+
"step": 189
|
1397 |
+
},
|
1398 |
+
{
|
1399 |
+
"epoch": 0.4196866155863878,
|
1400 |
+
"grad_norm": 0.00021101209858898073,
|
1401 |
+
"learning_rate": 1.0601050056180447e-05,
|
1402 |
+
"loss": 0.0,
|
1403 |
+
"step": 190
|
1404 |
+
},
|
1405 |
+
{
|
1406 |
+
"epoch": 0.42189549251052666,
|
1407 |
+
"grad_norm": 0.00020866327395197004,
|
1408 |
+
"learning_rate": 1.0487057051584856e-05,
|
1409 |
+
"loss": 0.0,
|
1410 |
+
"step": 191
|
1411 |
+
},
|
1412 |
+
{
|
1413 |
+
"epoch": 0.42410436943466556,
|
1414 |
+
"grad_norm": 0.00022013194393366575,
|
1415 |
+
"learning_rate": 1.0384981238178534e-05,
|
1416 |
+
"loss": 0.0,
|
1417 |
+
"step": 192
|
1418 |
+
},
|
1419 |
+
{
|
1420 |
+
"epoch": 0.42631324635880447,
|
1421 |
+
"grad_norm": 0.00032330441172234714,
|
1422 |
+
"learning_rate": 1.0294849944125004e-05,
|
1423 |
+
"loss": 0.0,
|
1424 |
+
"step": 193
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 0.4285221232829433,
|
1428 |
+
"grad_norm": 0.00035217651748098433,
|
1429 |
+
"learning_rate": 1.0216687299751144e-05,
|
1430 |
+
"loss": 0.0,
|
1431 |
+
"step": 194
|
1432 |
+
},
|
1433 |
+
{
|
1434 |
+
"epoch": 0.4307310002070822,
|
1435 |
+
"grad_norm": 8.39560671010986e-05,
|
1436 |
+
"learning_rate": 1.0150514231086887e-05,
|
1437 |
+
"loss": 0.0,
|
1438 |
+
"step": 195
|
1439 |
+
},
|
1440 |
+
{
|
1441 |
+
"epoch": 0.4329398771312211,
|
1442 |
+
"grad_norm": 8.635565609438345e-05,
|
1443 |
+
"learning_rate": 1.0096348454262845e-05,
|
1444 |
+
"loss": 0.0,
|
1445 |
+
"step": 196
|
1446 |
+
},
|
1447 |
+
{
|
1448 |
+
"epoch": 0.43514875405535997,
|
1449 |
+
"grad_norm": 8.677168807480484e-05,
|
1450 |
+
"learning_rate": 1.0054204470767243e-05,
|
1451 |
+
"loss": 0.0,
|
1452 |
+
"step": 197
|
1453 |
+
},
|
1454 |
+
{
|
1455 |
+
"epoch": 0.43735763097949887,
|
1456 |
+
"grad_norm": 8.660169260110706e-05,
|
1457 |
+
"learning_rate": 1.0024093563563546e-05,
|
1458 |
+
"loss": 0.0,
|
1459 |
+
"step": 198
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"epoch": 0.43956650790363777,
|
1463 |
+
"grad_norm": 8.837666246108711e-05,
|
1464 |
+
"learning_rate": 1.000602379406972e-05,
|
1465 |
+
"loss": 0.0,
|
1466 |
+
"step": 199
|
1467 |
+
},
|
1468 |
+
{
|
1469 |
+
"epoch": 0.4417753848277766,
|
1470 |
+
"grad_norm": 8.881120447767898e-05,
|
1471 |
+
"learning_rate": 1e-05,
|
1472 |
+
"loss": 0.0,
|
1473 |
+
"step": 200
|
1474 |
+
},
|
1475 |
+
{
|
1476 |
+
"epoch": 0.4417753848277766,
|
1477 |
+
"eval_loss": 2.660818836375256e-06,
|
1478 |
+
"eval_runtime": 2.0833,
|
1479 |
+
"eval_samples_per_second": 24.001,
|
1480 |
+
"eval_steps_per_second": 6.24,
|
1481 |
+
"step": 200
|
1482 |
}
|
1483 |
],
|
1484 |
"logging_steps": 1,
|
|
|
1502 |
"should_evaluate": false,
|
1503 |
"should_log": false,
|
1504 |
"should_save": true,
|
1505 |
+
"should_training_stop": true
|
1506 |
},
|
1507 |
"attributes": {}
|
1508 |
}
|
1509 |
},
|
1510 |
+
"total_flos": 6.49923578036224e+17,
|
1511 |
"train_batch_size": 1,
|
1512 |
"trial_name": null,
|
1513 |
"trial_params": null
|