Training in progress, step 349, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73911112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6778c35d7237cda41f150fe226d7ad2b42a18571675e22e19b37130222805000
|
3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 37431220
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0a4ae9caa1f7c350f9c7e85a253ed14dc3b50d5c7f44e3ac1655fbd5849f58f
|
3 |
size 37431220
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ca002fadce970e2797672d78741b69114f9a264197843fa3e3305d06f80f89f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2422,6 +2422,34 @@
|
|
2422 |
"learning_rate": 1.4393939393939396e-05,
|
2423 |
"loss": 1.1708,
|
2424 |
"step": 345
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2425 |
}
|
2426 |
],
|
2427 |
"logging_steps": 1,
|
@@ -2441,7 +2469,7 @@
|
|
2441 |
"attributes": {}
|
2442 |
}
|
2443 |
},
|
2444 |
-
"total_flos": 4.
|
2445 |
"train_batch_size": 4,
|
2446 |
"trial_name": null,
|
2447 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9574759945130316,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 349,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2422 |
"learning_rate": 1.4393939393939396e-05,
|
2423 |
"loss": 1.1708,
|
2424 |
"step": 345
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 0.9492455418381345,
|
2428 |
+
"grad_norm": 0.15814326703548431,
|
2429 |
+
"learning_rate": 1.3636363636363637e-05,
|
2430 |
+
"loss": 1.1695,
|
2431 |
+
"step": 346
|
2432 |
+
},
|
2433 |
+
{
|
2434 |
+
"epoch": 0.9519890260631001,
|
2435 |
+
"grad_norm": 0.15282317996025085,
|
2436 |
+
"learning_rate": 1.287878787878788e-05,
|
2437 |
+
"loss": 1.2195,
|
2438 |
+
"step": 347
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 0.9547325102880658,
|
2442 |
+
"grad_norm": 0.16384905576705933,
|
2443 |
+
"learning_rate": 1.2121212121212122e-05,
|
2444 |
+
"loss": 1.1989,
|
2445 |
+
"step": 348
|
2446 |
+
},
|
2447 |
+
{
|
2448 |
+
"epoch": 0.9574759945130316,
|
2449 |
+
"grad_norm": 0.16359828412532806,
|
2450 |
+
"learning_rate": 1.1363636363636365e-05,
|
2451 |
+
"loss": 1.0967,
|
2452 |
+
"step": 349
|
2453 |
}
|
2454 |
],
|
2455 |
"logging_steps": 1,
|
|
|
2469 |
"attributes": {}
|
2470 |
}
|
2471 |
},
|
2472 |
+
"total_flos": 4.323960827284808e+17,
|
2473 |
"train_batch_size": 4,
|
2474 |
"trial_name": null,
|
2475 |
"trial_params": null
|