Training in progress, step 500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 323014168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4f480d1202a174c70b1202204e7b19ce30e680e9aa677d4a6aa9b51470f4816
|
3 |
size 323014168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 165484738
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bed13abc506cd54f99481dbc28a31b45e3993ee70918e050ddaa7b4666bf34a
|
3 |
size 165484738
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de2a2fb86838d3020d5803839893bd1dcef4db60ee5326a49eb5f9bfb377bf78
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd4448a479fe2c3c13bb81ad3c5c2101e846d955cf940ee0558a49a098dd9051
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3237,6 +3237,364 @@
|
|
3237 |
"eval_samples_per_second": 2.928,
|
3238 |
"eval_steps_per_second": 2.928,
|
3239 |
"step": 450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3240 |
}
|
3241 |
],
|
3242 |
"logging_steps": 1,
|
@@ -3265,7 +3623,7 @@
|
|
3265 |
"attributes": {}
|
3266 |
}
|
3267 |
},
|
3268 |
-
"total_flos":
|
3269 |
"train_batch_size": 1,
|
3270 |
"trial_name": null,
|
3271 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.372147798538208,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
+
"epoch": 0.8487163165711861,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3237 |
"eval_samples_per_second": 2.928,
|
3238 |
"eval_steps_per_second": 2.928,
|
3239 |
"step": 450
|
3240 |
+
},
|
3241 |
+
{
|
3242 |
+
"epoch": 0.7655421175472098,
|
3243 |
+
"grad_norm": 0.35101303458213806,
|
3244 |
+
"learning_rate": 0.00017557832889489357,
|
3245 |
+
"loss": 1.093,
|
3246 |
+
"step": 451
|
3247 |
+
},
|
3248 |
+
{
|
3249 |
+
"epoch": 0.7672395501803522,
|
3250 |
+
"grad_norm": 0.23341582715511322,
|
3251 |
+
"learning_rate": 0.0001751091735662596,
|
3252 |
+
"loss": 0.7267,
|
3253 |
+
"step": 452
|
3254 |
+
},
|
3255 |
+
{
|
3256 |
+
"epoch": 0.7689369828134945,
|
3257 |
+
"grad_norm": 0.25353920459747314,
|
3258 |
+
"learning_rate": 0.00017463976538852654,
|
3259 |
+
"loss": 0.8581,
|
3260 |
+
"step": 453
|
3261 |
+
},
|
3262 |
+
{
|
3263 |
+
"epoch": 0.7706344154466369,
|
3264 |
+
"grad_norm": 0.21722197532653809,
|
3265 |
+
"learning_rate": 0.00017417010908862962,
|
3266 |
+
"loss": 0.7378,
|
3267 |
+
"step": 454
|
3268 |
+
},
|
3269 |
+
{
|
3270 |
+
"epoch": 0.7723318480797793,
|
3271 |
+
"grad_norm": 0.22062261402606964,
|
3272 |
+
"learning_rate": 0.00017370020939600248,
|
3273 |
+
"loss": 0.7099,
|
3274 |
+
"step": 455
|
3275 |
+
},
|
3276 |
+
{
|
3277 |
+
"epoch": 0.7740292807129217,
|
3278 |
+
"grad_norm": 0.1780662089586258,
|
3279 |
+
"learning_rate": 0.00017323007104252984,
|
3280 |
+
"loss": 0.4919,
|
3281 |
+
"step": 456
|
3282 |
+
},
|
3283 |
+
{
|
3284 |
+
"epoch": 0.775726713346064,
|
3285 |
+
"grad_norm": 0.20236726105213165,
|
3286 |
+
"learning_rate": 0.00017275969876249974,
|
3287 |
+
"loss": 0.5919,
|
3288 |
+
"step": 457
|
3289 |
+
},
|
3290 |
+
{
|
3291 |
+
"epoch": 0.7774241459792065,
|
3292 |
+
"grad_norm": 0.22523203492164612,
|
3293 |
+
"learning_rate": 0.00017228909729255574,
|
3294 |
+
"loss": 0.7686,
|
3295 |
+
"step": 458
|
3296 |
+
},
|
3297 |
+
{
|
3298 |
+
"epoch": 0.7791215786123489,
|
3299 |
+
"grad_norm": 0.23676562309265137,
|
3300 |
+
"learning_rate": 0.00017181827137164953,
|
3301 |
+
"loss": 0.8026,
|
3302 |
+
"step": 459
|
3303 |
+
},
|
3304 |
+
{
|
3305 |
+
"epoch": 0.7808190112454912,
|
3306 |
+
"grad_norm": 0.21583965420722961,
|
3307 |
+
"learning_rate": 0.00017134722574099276,
|
3308 |
+
"loss": 0.7097,
|
3309 |
+
"step": 460
|
3310 |
+
},
|
3311 |
+
{
|
3312 |
+
"epoch": 0.7825164438786336,
|
3313 |
+
"grad_norm": 0.2776244282722473,
|
3314 |
+
"learning_rate": 0.0001708759651440098,
|
3315 |
+
"loss": 0.9476,
|
3316 |
+
"step": 461
|
3317 |
+
},
|
3318 |
+
{
|
3319 |
+
"epoch": 0.784213876511776,
|
3320 |
+
"grad_norm": 0.2028312236070633,
|
3321 |
+
"learning_rate": 0.00017040449432628962,
|
3322 |
+
"loss": 0.6013,
|
3323 |
+
"step": 462
|
3324 |
+
},
|
3325 |
+
{
|
3326 |
+
"epoch": 0.7859113091449184,
|
3327 |
+
"grad_norm": 0.2275046855211258,
|
3328 |
+
"learning_rate": 0.0001699328180355381,
|
3329 |
+
"loss": 0.7551,
|
3330 |
+
"step": 463
|
3331 |
+
},
|
3332 |
+
{
|
3333 |
+
"epoch": 0.7876087417780607,
|
3334 |
+
"grad_norm": 0.20202623307704926,
|
3335 |
+
"learning_rate": 0.00016946094102153025,
|
3336 |
+
"loss": 0.4759,
|
3337 |
+
"step": 464
|
3338 |
+
},
|
3339 |
+
{
|
3340 |
+
"epoch": 0.7893061744112031,
|
3341 |
+
"grad_norm": 0.17477299273014069,
|
3342 |
+
"learning_rate": 0.00016898886803606237,
|
3343 |
+
"loss": 0.4537,
|
3344 |
+
"step": 465
|
3345 |
+
},
|
3346 |
+
{
|
3347 |
+
"epoch": 0.7910036070443455,
|
3348 |
+
"grad_norm": 0.08333531022071838,
|
3349 |
+
"learning_rate": 0.0001685166038329042,
|
3350 |
+
"loss": 0.1224,
|
3351 |
+
"step": 466
|
3352 |
+
},
|
3353 |
+
{
|
3354 |
+
"epoch": 0.7927010396774878,
|
3355 |
+
"grad_norm": 0.21820639073848724,
|
3356 |
+
"learning_rate": 0.000168044153167751,
|
3357 |
+
"loss": 0.5658,
|
3358 |
+
"step": 467
|
3359 |
+
},
|
3360 |
+
{
|
3361 |
+
"epoch": 0.7943984723106302,
|
3362 |
+
"grad_norm": 0.14012931287288666,
|
3363 |
+
"learning_rate": 0.00016757152079817573,
|
3364 |
+
"loss": 0.2818,
|
3365 |
+
"step": 468
|
3366 |
+
},
|
3367 |
+
{
|
3368 |
+
"epoch": 0.7960959049437726,
|
3369 |
+
"grad_norm": 0.1741451919078827,
|
3370 |
+
"learning_rate": 0.00016709871148358108,
|
3371 |
+
"loss": 0.3492,
|
3372 |
+
"step": 469
|
3373 |
+
},
|
3374 |
+
{
|
3375 |
+
"epoch": 0.797793337576915,
|
3376 |
+
"grad_norm": 0.1527792066335678,
|
3377 |
+
"learning_rate": 0.00016662572998515164,
|
3378 |
+
"loss": 0.2187,
|
3379 |
+
"step": 470
|
3380 |
+
},
|
3381 |
+
{
|
3382 |
+
"epoch": 0.7994907702100573,
|
3383 |
+
"grad_norm": 0.1383330523967743,
|
3384 |
+
"learning_rate": 0.00016615258106580585,
|
3385 |
+
"loss": 0.2405,
|
3386 |
+
"step": 471
|
3387 |
+
},
|
3388 |
+
{
|
3389 |
+
"epoch": 0.8011882028431997,
|
3390 |
+
"grad_norm": 0.13245010375976562,
|
3391 |
+
"learning_rate": 0.000165679269490148,
|
3392 |
+
"loss": 0.2295,
|
3393 |
+
"step": 472
|
3394 |
+
},
|
3395 |
+
{
|
3396 |
+
"epoch": 0.8028856354763421,
|
3397 |
+
"grad_norm": 0.13676372170448303,
|
3398 |
+
"learning_rate": 0.0001652058000244205,
|
3399 |
+
"loss": 0.2516,
|
3400 |
+
"step": 473
|
3401 |
+
},
|
3402 |
+
{
|
3403 |
+
"epoch": 0.8045830681094844,
|
3404 |
+
"grad_norm": 0.07976588606834412,
|
3405 |
+
"learning_rate": 0.00016473217743645556,
|
3406 |
+
"loss": 0.0916,
|
3407 |
+
"step": 474
|
3408 |
+
},
|
3409 |
+
{
|
3410 |
+
"epoch": 0.8062805007426268,
|
3411 |
+
"grad_norm": 0.11341172456741333,
|
3412 |
+
"learning_rate": 0.00016425840649562736,
|
3413 |
+
"loss": 0.152,
|
3414 |
+
"step": 475
|
3415 |
+
},
|
3416 |
+
{
|
3417 |
+
"epoch": 0.8079779333757692,
|
3418 |
+
"grad_norm": 0.12954847514629364,
|
3419 |
+
"learning_rate": 0.00016378449197280412,
|
3420 |
+
"loss": 0.1525,
|
3421 |
+
"step": 476
|
3422 |
+
},
|
3423 |
+
{
|
3424 |
+
"epoch": 0.8096753660089115,
|
3425 |
+
"grad_norm": 0.08243954181671143,
|
3426 |
+
"learning_rate": 0.0001633104386402997,
|
3427 |
+
"loss": 0.0708,
|
3428 |
+
"step": 477
|
3429 |
+
},
|
3430 |
+
{
|
3431 |
+
"epoch": 0.8113727986420539,
|
3432 |
+
"grad_norm": 0.0030563257168978453,
|
3433 |
+
"learning_rate": 0.00016283625127182596,
|
3434 |
+
"loss": 0.0001,
|
3435 |
+
"step": 478
|
3436 |
+
},
|
3437 |
+
{
|
3438 |
+
"epoch": 0.8130702312751963,
|
3439 |
+
"grad_norm": 0.0008315025479532778,
|
3440 |
+
"learning_rate": 0.00016236193464244444,
|
3441 |
+
"loss": 0.0,
|
3442 |
+
"step": 479
|
3443 |
+
},
|
3444 |
+
{
|
3445 |
+
"epoch": 0.8147676639083387,
|
3446 |
+
"grad_norm": 0.006789859849959612,
|
3447 |
+
"learning_rate": 0.00016188749352851825,
|
3448 |
+
"loss": 0.0002,
|
3449 |
+
"step": 480
|
3450 |
+
},
|
3451 |
+
{
|
3452 |
+
"epoch": 0.816465096541481,
|
3453 |
+
"grad_norm": 0.009863720275461674,
|
3454 |
+
"learning_rate": 0.00016141293270766424,
|
3455 |
+
"loss": 0.0002,
|
3456 |
+
"step": 481
|
3457 |
+
},
|
3458 |
+
{
|
3459 |
+
"epoch": 0.8181625291746234,
|
3460 |
+
"grad_norm": 0.059226732701063156,
|
3461 |
+
"learning_rate": 0.00016093825695870462,
|
3462 |
+
"loss": 0.0008,
|
3463 |
+
"step": 482
|
3464 |
+
},
|
3465 |
+
{
|
3466 |
+
"epoch": 0.8198599618077658,
|
3467 |
+
"grad_norm": 0.0056890202686190605,
|
3468 |
+
"learning_rate": 0.00016046347106161876,
|
3469 |
+
"loss": 0.0003,
|
3470 |
+
"step": 483
|
3471 |
+
},
|
3472 |
+
{
|
3473 |
+
"epoch": 0.8215573944409081,
|
3474 |
+
"grad_norm": 0.0004136976203881204,
|
3475 |
+
"learning_rate": 0.0001599885797974956,
|
3476 |
+
"loss": 0.0,
|
3477 |
+
"step": 484
|
3478 |
+
},
|
3479 |
+
{
|
3480 |
+
"epoch": 0.8232548270740505,
|
3481 |
+
"grad_norm": 0.016998767852783203,
|
3482 |
+
"learning_rate": 0.00015951358794848465,
|
3483 |
+
"loss": 0.0004,
|
3484 |
+
"step": 485
|
3485 |
+
},
|
3486 |
+
{
|
3487 |
+
"epoch": 0.8249522597071929,
|
3488 |
+
"grad_norm": 0.0017255417769774795,
|
3489 |
+
"learning_rate": 0.00015903850029774878,
|
3490 |
+
"loss": 0.0001,
|
3491 |
+
"step": 486
|
3492 |
+
},
|
3493 |
+
{
|
3494 |
+
"epoch": 0.8266496923403353,
|
3495 |
+
"grad_norm": 0.0012270875740796328,
|
3496 |
+
"learning_rate": 0.0001585633216294152,
|
3497 |
+
"loss": 0.0001,
|
3498 |
+
"step": 487
|
3499 |
+
},
|
3500 |
+
{
|
3501 |
+
"epoch": 0.8283471249734776,
|
3502 |
+
"grad_norm": 0.0009051132365129888,
|
3503 |
+
"learning_rate": 0.0001580880567285279,
|
3504 |
+
"loss": 0.0,
|
3505 |
+
"step": 488
|
3506 |
+
},
|
3507 |
+
{
|
3508 |
+
"epoch": 0.83004455760662,
|
3509 |
+
"grad_norm": 0.0009166182717308402,
|
3510 |
+
"learning_rate": 0.00015761271038099912,
|
3511 |
+
"loss": 0.0001,
|
3512 |
+
"step": 489
|
3513 |
+
},
|
3514 |
+
{
|
3515 |
+
"epoch": 0.8317419902397624,
|
3516 |
+
"grad_norm": 0.002149962354451418,
|
3517 |
+
"learning_rate": 0.00015713728737356137,
|
3518 |
+
"loss": 0.0001,
|
3519 |
+
"step": 490
|
3520 |
+
},
|
3521 |
+
{
|
3522 |
+
"epoch": 0.8334394228729047,
|
3523 |
+
"grad_norm": 0.008419407531619072,
|
3524 |
+
"learning_rate": 0.00015666179249371892,
|
3525 |
+
"loss": 0.0004,
|
3526 |
+
"step": 491
|
3527 |
+
},
|
3528 |
+
{
|
3529 |
+
"epoch": 0.8351368555060471,
|
3530 |
+
"grad_norm": 0.0006536226137541234,
|
3531 |
+
"learning_rate": 0.00015618623052970006,
|
3532 |
+
"loss": 0.0,
|
3533 |
+
"step": 492
|
3534 |
+
},
|
3535 |
+
{
|
3536 |
+
"epoch": 0.8368342881391895,
|
3537 |
+
"grad_norm": 0.035275768488645554,
|
3538 |
+
"learning_rate": 0.0001557106062704085,
|
3539 |
+
"loss": 0.0064,
|
3540 |
+
"step": 493
|
3541 |
+
},
|
3542 |
+
{
|
3543 |
+
"epoch": 0.8385317207723318,
|
3544 |
+
"grad_norm": 0.002518226159736514,
|
3545 |
+
"learning_rate": 0.00015523492450537517,
|
3546 |
+
"loss": 0.0001,
|
3547 |
+
"step": 494
|
3548 |
+
},
|
3549 |
+
{
|
3550 |
+
"epoch": 0.8402291534054742,
|
3551 |
+
"grad_norm": 0.00048825182602740824,
|
3552 |
+
"learning_rate": 0.00015475919002471016,
|
3553 |
+
"loss": 0.0,
|
3554 |
+
"step": 495
|
3555 |
+
},
|
3556 |
+
{
|
3557 |
+
"epoch": 0.8419265860386166,
|
3558 |
+
"grad_norm": 0.007141390815377235,
|
3559 |
+
"learning_rate": 0.0001542834076190544,
|
3560 |
+
"loss": 0.0001,
|
3561 |
+
"step": 496
|
3562 |
+
},
|
3563 |
+
{
|
3564 |
+
"epoch": 0.843624018671759,
|
3565 |
+
"grad_norm": 0.048018842935562134,
|
3566 |
+
"learning_rate": 0.00015380758207953155,
|
3567 |
+
"loss": 0.0004,
|
3568 |
+
"step": 497
|
3569 |
+
},
|
3570 |
+
{
|
3571 |
+
"epoch": 0.8453214513049013,
|
3572 |
+
"grad_norm": 0.0009412519866600633,
|
3573 |
+
"learning_rate": 0.0001533317181976994,
|
3574 |
+
"loss": 0.0,
|
3575 |
+
"step": 498
|
3576 |
+
},
|
3577 |
+
{
|
3578 |
+
"epoch": 0.8470188839380437,
|
3579 |
+
"grad_norm": 0.031883303076028824,
|
3580 |
+
"learning_rate": 0.00015285582076550198,
|
3581 |
+
"loss": 0.0037,
|
3582 |
+
"step": 499
|
3583 |
+
},
|
3584 |
+
{
|
3585 |
+
"epoch": 0.8487163165711861,
|
3586 |
+
"grad_norm": 0.016174526885151863,
|
3587 |
+
"learning_rate": 0.00015237989457522118,
|
3588 |
+
"loss": 0.0007,
|
3589 |
+
"step": 500
|
3590 |
+
},
|
3591 |
+
{
|
3592 |
+
"epoch": 0.8487163165711861,
|
3593 |
+
"eval_loss": 0.372147798538208,
|
3594 |
+
"eval_runtime": 65.9897,
|
3595 |
+
"eval_samples_per_second": 2.925,
|
3596 |
+
"eval_steps_per_second": 2.925,
|
3597 |
+
"step": 500
|
3598 |
}
|
3599 |
],
|
3600 |
"logging_steps": 1,
|
|
|
3623 |
"attributes": {}
|
3624 |
}
|
3625 |
},
|
3626 |
+
"total_flos": 3.2402043214390886e+17,
|
3627 |
"train_batch_size": 1,
|
3628 |
"trial_name": null,
|
3629 |
"trial_params": null
|