Training in progress, step 359, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 60010048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cd7a11dd960decfde159b9ffedcb277804a1627b5b44f99755257d42961884c
|
3 |
size 60010048
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 30428180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85eceb4a829aa4047fbf635b04070cf32f5480f395bbe1d6b5df070c2e3a1aac
|
3 |
size 30428180
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dcd05dfbbdeba643c656ed11b36a8a6487d3151c9ac5ff333ebbd78351d6657
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2422,6 +2422,104 @@
|
|
2422 |
"learning_rate": 1.4393939393939396e-05,
|
2423 |
"loss": 1.1526,
|
2424 |
"step": 345
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2425 |
}
|
2426 |
],
|
2427 |
"logging_steps": 1,
|
@@ -2441,7 +2539,7 @@
|
|
2441 |
"attributes": {}
|
2442 |
}
|
2443 |
},
|
2444 |
-
"total_flos": 4.
|
2445 |
"train_batch_size": 4,
|
2446 |
"trial_name": null,
|
2447 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9849108367626886,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 359,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2422 |
"learning_rate": 1.4393939393939396e-05,
|
2423 |
"loss": 1.1526,
|
2424 |
"step": 345
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 0.9492455418381345,
|
2428 |
+
"grad_norm": 0.14177238941192627,
|
2429 |
+
"learning_rate": 1.3636363636363637e-05,
|
2430 |
+
"loss": 1.132,
|
2431 |
+
"step": 346
|
2432 |
+
},
|
2433 |
+
{
|
2434 |
+
"epoch": 0.9519890260631001,
|
2435 |
+
"grad_norm": 0.13835884630680084,
|
2436 |
+
"learning_rate": 1.287878787878788e-05,
|
2437 |
+
"loss": 1.1599,
|
2438 |
+
"step": 347
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 0.9547325102880658,
|
2442 |
+
"grad_norm": 0.14390669763088226,
|
2443 |
+
"learning_rate": 1.2121212121212122e-05,
|
2444 |
+
"loss": 1.15,
|
2445 |
+
"step": 348
|
2446 |
+
},
|
2447 |
+
{
|
2448 |
+
"epoch": 0.9574759945130316,
|
2449 |
+
"grad_norm": 0.14811821281909943,
|
2450 |
+
"learning_rate": 1.1363636363636365e-05,
|
2451 |
+
"loss": 1.0759,
|
2452 |
+
"step": 349
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 0.9602194787379973,
|
2456 |
+
"grad_norm": 0.14959345757961273,
|
2457 |
+
"learning_rate": 1.0606060606060607e-05,
|
2458 |
+
"loss": 1.126,
|
2459 |
+
"step": 350
|
2460 |
+
},
|
2461 |
+
{
|
2462 |
+
"epoch": 0.9629629629629629,
|
2463 |
+
"grad_norm": 0.14656995236873627,
|
2464 |
+
"learning_rate": 9.848484848484848e-06,
|
2465 |
+
"loss": 1.1341,
|
2466 |
+
"step": 351
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 0.9657064471879286,
|
2470 |
+
"grad_norm": 0.14695106446743011,
|
2471 |
+
"learning_rate": 9.090909090909091e-06,
|
2472 |
+
"loss": 1.1259,
|
2473 |
+
"step": 352
|
2474 |
+
},
|
2475 |
+
{
|
2476 |
+
"epoch": 0.9684499314128944,
|
2477 |
+
"grad_norm": 0.14155460894107819,
|
2478 |
+
"learning_rate": 8.333333333333334e-06,
|
2479 |
+
"loss": 1.1503,
|
2480 |
+
"step": 353
|
2481 |
+
},
|
2482 |
+
{
|
2483 |
+
"epoch": 0.9711934156378601,
|
2484 |
+
"grad_norm": 0.1382407397031784,
|
2485 |
+
"learning_rate": 7.5757575757575764e-06,
|
2486 |
+
"loss": 1.1417,
|
2487 |
+
"step": 354
|
2488 |
+
},
|
2489 |
+
{
|
2490 |
+
"epoch": 0.9739368998628258,
|
2491 |
+
"grad_norm": 0.14089229702949524,
|
2492 |
+
"learning_rate": 6.818181818181818e-06,
|
2493 |
+
"loss": 1.1551,
|
2494 |
+
"step": 355
|
2495 |
+
},
|
2496 |
+
{
|
2497 |
+
"epoch": 0.9766803840877915,
|
2498 |
+
"grad_norm": 0.14886945486068726,
|
2499 |
+
"learning_rate": 6.060606060606061e-06,
|
2500 |
+
"loss": 1.0973,
|
2501 |
+
"step": 356
|
2502 |
+
},
|
2503 |
+
{
|
2504 |
+
"epoch": 0.9794238683127572,
|
2505 |
+
"grad_norm": 0.1485728621482849,
|
2506 |
+
"learning_rate": 5.303030303030304e-06,
|
2507 |
+
"loss": 1.1028,
|
2508 |
+
"step": 357
|
2509 |
+
},
|
2510 |
+
{
|
2511 |
+
"epoch": 0.9821673525377229,
|
2512 |
+
"grad_norm": 0.1496025174856186,
|
2513 |
+
"learning_rate": 4.5454545454545455e-06,
|
2514 |
+
"loss": 1.0941,
|
2515 |
+
"step": 358
|
2516 |
+
},
|
2517 |
+
{
|
2518 |
+
"epoch": 0.9849108367626886,
|
2519 |
+
"grad_norm": 0.1394403725862503,
|
2520 |
+
"learning_rate": 3.7878787878787882e-06,
|
2521 |
+
"loss": 1.1452,
|
2522 |
+
"step": 359
|
2523 |
}
|
2524 |
],
|
2525 |
"logging_steps": 1,
|
|
|
2539 |
"attributes": {}
|
2540 |
}
|
2541 |
},
|
2542 |
+
"total_flos": 4.3742608796698214e+17,
|
2543 |
"train_batch_size": 4,
|
2544 |
"trial_name": null,
|
2545 |
"trial_params": null
|