bert-base-uncased-mean-400 / trainer_state.json
sobamchan's picture
Upload folder using huggingface_hub
7349fd9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09176416609314063,
"eval_steps": 5,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011470520761642578,
"eval_loss": 2.755366325378418,
"eval_runtime": 14.0088,
"eval_samples_per_second": 469.989,
"eval_steps_per_second": 3.712,
"step": 5
},
{
"epoch": 0.0022941041523285156,
"eval_loss": 2.750617027282715,
"eval_runtime": 14.0654,
"eval_samples_per_second": 468.099,
"eval_steps_per_second": 3.697,
"step": 10
},
{
"epoch": 0.0034411562284927736,
"eval_loss": 2.7424304485321045,
"eval_runtime": 14.1186,
"eval_samples_per_second": 466.336,
"eval_steps_per_second": 3.683,
"step": 15
},
{
"epoch": 0.004588208304657031,
"eval_loss": 2.730889081954956,
"eval_runtime": 14.2451,
"eval_samples_per_second": 462.193,
"eval_steps_per_second": 3.65,
"step": 20
},
{
"epoch": 0.005735260380821289,
"eval_loss": 2.7159576416015625,
"eval_runtime": 14.245,
"eval_samples_per_second": 462.199,
"eval_steps_per_second": 3.65,
"step": 25
},
{
"epoch": 0.006882312456985547,
"eval_loss": 2.6975343227386475,
"eval_runtime": 14.3448,
"eval_samples_per_second": 458.981,
"eval_steps_per_second": 3.625,
"step": 30
},
{
"epoch": 0.008029364533149804,
"eval_loss": 2.675693988800049,
"eval_runtime": 16.965,
"eval_samples_per_second": 388.094,
"eval_steps_per_second": 3.065,
"step": 35
},
{
"epoch": 0.009176416609314062,
"eval_loss": 2.650242328643799,
"eval_runtime": 14.2414,
"eval_samples_per_second": 462.315,
"eval_steps_per_second": 3.651,
"step": 40
},
{
"epoch": 0.01032346868547832,
"eval_loss": 2.6213560104370117,
"eval_runtime": 14.4168,
"eval_samples_per_second": 456.689,
"eval_steps_per_second": 3.607,
"step": 45
},
{
"epoch": 0.011470520761642579,
"eval_loss": 2.5892951488494873,
"eval_runtime": 14.3332,
"eval_samples_per_second": 459.353,
"eval_steps_per_second": 3.628,
"step": 50
},
{
"epoch": 0.012617572837806837,
"eval_loss": 2.5538384914398193,
"eval_runtime": 14.4067,
"eval_samples_per_second": 457.01,
"eval_steps_per_second": 3.609,
"step": 55
},
{
"epoch": 0.013764624913971095,
"eval_loss": 2.51452374458313,
"eval_runtime": 14.4948,
"eval_samples_per_second": 454.233,
"eval_steps_per_second": 3.588,
"step": 60
},
{
"epoch": 0.014911676990135353,
"eval_loss": 2.4726295471191406,
"eval_runtime": 14.3162,
"eval_samples_per_second": 459.898,
"eval_steps_per_second": 3.632,
"step": 65
},
{
"epoch": 0.01605872906629961,
"eval_loss": 2.428196668624878,
"eval_runtime": 14.3204,
"eval_samples_per_second": 459.763,
"eval_steps_per_second": 3.631,
"step": 70
},
{
"epoch": 0.017205781142463867,
"eval_loss": 2.379453659057617,
"eval_runtime": 14.3796,
"eval_samples_per_second": 457.872,
"eval_steps_per_second": 3.616,
"step": 75
},
{
"epoch": 0.018352833218628125,
"eval_loss": 2.327221393585205,
"eval_runtime": 14.3415,
"eval_samples_per_second": 459.089,
"eval_steps_per_second": 3.626,
"step": 80
},
{
"epoch": 0.019499885294792383,
"eval_loss": 2.2712388038635254,
"eval_runtime": 14.462,
"eval_samples_per_second": 455.263,
"eval_steps_per_second": 3.596,
"step": 85
},
{
"epoch": 0.02064693737095664,
"eval_loss": 2.2120068073272705,
"eval_runtime": 14.334,
"eval_samples_per_second": 459.326,
"eval_steps_per_second": 3.628,
"step": 90
},
{
"epoch": 0.0217939894471209,
"eval_loss": 2.1501331329345703,
"eval_runtime": 14.3506,
"eval_samples_per_second": 458.796,
"eval_steps_per_second": 3.624,
"step": 95
},
{
"epoch": 0.022941041523285157,
"grad_norm": 19.895355224609375,
"learning_rate": 7.645259938837921e-07,
"loss": 3.6197,
"step": 100
},
{
"epoch": 0.022941041523285157,
"eval_loss": 2.086596727371216,
"eval_runtime": 16.6465,
"eval_samples_per_second": 395.519,
"eval_steps_per_second": 3.124,
"step": 100
},
{
"epoch": 0.024088093599449415,
"eval_loss": 2.0223236083984375,
"eval_runtime": 14.471,
"eval_samples_per_second": 454.979,
"eval_steps_per_second": 3.593,
"step": 105
},
{
"epoch": 0.025235145675613673,
"eval_loss": 1.9571231603622437,
"eval_runtime": 14.3783,
"eval_samples_per_second": 457.913,
"eval_steps_per_second": 3.617,
"step": 110
},
{
"epoch": 0.02638219775177793,
"eval_loss": 1.890655517578125,
"eval_runtime": 14.288,
"eval_samples_per_second": 460.805,
"eval_steps_per_second": 3.639,
"step": 115
},
{
"epoch": 0.02752924982794219,
"eval_loss": 1.823920488357544,
"eval_runtime": 14.3764,
"eval_samples_per_second": 457.972,
"eval_steps_per_second": 3.617,
"step": 120
},
{
"epoch": 0.028676301904106447,
"eval_loss": 1.758331298828125,
"eval_runtime": 14.2666,
"eval_samples_per_second": 461.499,
"eval_steps_per_second": 3.645,
"step": 125
},
{
"epoch": 0.029823353980270705,
"eval_loss": 1.6937522888183594,
"eval_runtime": 14.2522,
"eval_samples_per_second": 461.963,
"eval_steps_per_second": 3.649,
"step": 130
},
{
"epoch": 0.030970406056434963,
"eval_loss": 1.6316019296646118,
"eval_runtime": 16.3104,
"eval_samples_per_second": 403.67,
"eval_steps_per_second": 3.188,
"step": 135
},
{
"epoch": 0.03211745813259922,
"eval_loss": 1.571895718574524,
"eval_runtime": 14.2422,
"eval_samples_per_second": 462.288,
"eval_steps_per_second": 3.651,
"step": 140
},
{
"epoch": 0.033264510208763476,
"eval_loss": 1.5148202180862427,
"eval_runtime": 14.3624,
"eval_samples_per_second": 458.418,
"eval_steps_per_second": 3.621,
"step": 145
},
{
"epoch": 0.034411562284927734,
"eval_loss": 1.459762454032898,
"eval_runtime": 14.3035,
"eval_samples_per_second": 460.308,
"eval_steps_per_second": 3.635,
"step": 150
},
{
"epoch": 0.03555861436109199,
"eval_loss": 1.4081143140792847,
"eval_runtime": 14.2988,
"eval_samples_per_second": 460.46,
"eval_steps_per_second": 3.637,
"step": 155
},
{
"epoch": 0.03670566643725625,
"eval_loss": 1.3612124919891357,
"eval_runtime": 14.2569,
"eval_samples_per_second": 461.812,
"eval_steps_per_second": 3.647,
"step": 160
},
{
"epoch": 0.03785271851342051,
"eval_loss": 1.318212866783142,
"eval_runtime": 14.282,
"eval_samples_per_second": 460.999,
"eval_steps_per_second": 3.641,
"step": 165
},
{
"epoch": 0.038999770589584766,
"eval_loss": 1.2802687883377075,
"eval_runtime": 14.4058,
"eval_samples_per_second": 457.038,
"eval_steps_per_second": 3.61,
"step": 170
},
{
"epoch": 0.040146822665749024,
"eval_loss": 1.246294617652893,
"eval_runtime": 14.2804,
"eval_samples_per_second": 461.051,
"eval_steps_per_second": 3.641,
"step": 175
},
{
"epoch": 0.04129387474191328,
"eval_loss": 1.2160167694091797,
"eval_runtime": 14.3449,
"eval_samples_per_second": 458.977,
"eval_steps_per_second": 3.625,
"step": 180
},
{
"epoch": 0.04244092681807754,
"eval_loss": 1.189509630203247,
"eval_runtime": 14.2361,
"eval_samples_per_second": 462.486,
"eval_steps_per_second": 3.653,
"step": 185
},
{
"epoch": 0.0435879788942418,
"eval_loss": 1.1653709411621094,
"eval_runtime": 14.3748,
"eval_samples_per_second": 458.025,
"eval_steps_per_second": 3.617,
"step": 190
},
{
"epoch": 0.044735030970406056,
"eval_loss": 1.143513798713684,
"eval_runtime": 14.25,
"eval_samples_per_second": 462.035,
"eval_steps_per_second": 3.649,
"step": 195
},
{
"epoch": 0.045882083046570314,
"grad_norm": 8.192963600158691,
"learning_rate": 1.5290519877675841e-06,
"loss": 2.292,
"step": 200
},
{
"epoch": 0.045882083046570314,
"eval_loss": 1.1239805221557617,
"eval_runtime": 16.3012,
"eval_samples_per_second": 403.896,
"eval_steps_per_second": 3.19,
"step": 200
},
{
"epoch": 0.04702913512273457,
"eval_loss": 1.1064891815185547,
"eval_runtime": 14.2673,
"eval_samples_per_second": 461.476,
"eval_steps_per_second": 3.645,
"step": 205
},
{
"epoch": 0.04817618719889883,
"eval_loss": 1.0907284021377563,
"eval_runtime": 14.2601,
"eval_samples_per_second": 461.707,
"eval_steps_per_second": 3.647,
"step": 210
},
{
"epoch": 0.04932323927506309,
"eval_loss": 1.076059341430664,
"eval_runtime": 14.2845,
"eval_samples_per_second": 460.919,
"eval_steps_per_second": 3.64,
"step": 215
},
{
"epoch": 0.050470291351227346,
"eval_loss": 1.062280535697937,
"eval_runtime": 14.217,
"eval_samples_per_second": 463.109,
"eval_steps_per_second": 3.658,
"step": 220
},
{
"epoch": 0.051617343427391604,
"eval_loss": 1.0492550134658813,
"eval_runtime": 14.2233,
"eval_samples_per_second": 462.901,
"eval_steps_per_second": 3.656,
"step": 225
},
{
"epoch": 0.05276439550355586,
"eval_loss": 1.0374095439910889,
"eval_runtime": 14.5693,
"eval_samples_per_second": 451.909,
"eval_steps_per_second": 3.569,
"step": 230
},
{
"epoch": 0.05391144757972012,
"eval_loss": 1.0259910821914673,
"eval_runtime": 15.9876,
"eval_samples_per_second": 411.819,
"eval_steps_per_second": 3.253,
"step": 235
},
{
"epoch": 0.05505849965588438,
"eval_loss": 1.0147359371185303,
"eval_runtime": 14.2101,
"eval_samples_per_second": 463.332,
"eval_steps_per_second": 3.659,
"step": 240
},
{
"epoch": 0.056205551732048636,
"eval_loss": 1.0042893886566162,
"eval_runtime": 14.2547,
"eval_samples_per_second": 461.882,
"eval_steps_per_second": 3.648,
"step": 245
},
{
"epoch": 0.057352603808212894,
"eval_loss": 0.9941452741622925,
"eval_runtime": 14.2125,
"eval_samples_per_second": 463.253,
"eval_steps_per_second": 3.659,
"step": 250
},
{
"epoch": 0.05849965588437715,
"eval_loss": 0.9848644733428955,
"eval_runtime": 14.2628,
"eval_samples_per_second": 461.621,
"eval_steps_per_second": 3.646,
"step": 255
},
{
"epoch": 0.05964670796054141,
"eval_loss": 0.9763049483299255,
"eval_runtime": 14.2014,
"eval_samples_per_second": 463.617,
"eval_steps_per_second": 3.662,
"step": 260
},
{
"epoch": 0.06079376003670567,
"eval_loss": 0.9682185649871826,
"eval_runtime": 16.4598,
"eval_samples_per_second": 400.004,
"eval_steps_per_second": 3.159,
"step": 265
},
{
"epoch": 0.06194081211286993,
"eval_loss": 0.9602033495903015,
"eval_runtime": 14.3432,
"eval_samples_per_second": 459.031,
"eval_steps_per_second": 3.625,
"step": 270
},
{
"epoch": 0.06308786418903418,
"eval_loss": 0.952538251876831,
"eval_runtime": 14.2143,
"eval_samples_per_second": 463.195,
"eval_steps_per_second": 3.658,
"step": 275
},
{
"epoch": 0.06423491626519844,
"eval_loss": 0.9450673460960388,
"eval_runtime": 14.2195,
"eval_samples_per_second": 463.025,
"eval_steps_per_second": 3.657,
"step": 280
},
{
"epoch": 0.0653819683413627,
"eval_loss": 0.937529981136322,
"eval_runtime": 14.2726,
"eval_samples_per_second": 461.303,
"eval_steps_per_second": 3.643,
"step": 285
},
{
"epoch": 0.06652902041752695,
"eval_loss": 0.930277407169342,
"eval_runtime": 14.5409,
"eval_samples_per_second": 452.791,
"eval_steps_per_second": 3.576,
"step": 290
},
{
"epoch": 0.06767607249369122,
"eval_loss": 0.9230740666389465,
"eval_runtime": 14.2552,
"eval_samples_per_second": 461.868,
"eval_steps_per_second": 3.648,
"step": 295
},
{
"epoch": 0.06882312456985547,
"grad_norm": 7.243612766265869,
"learning_rate": 2.2935779816513764e-06,
"loss": 1.5711,
"step": 300
},
{
"epoch": 0.06882312456985547,
"eval_loss": 0.915981650352478,
"eval_runtime": 16.3997,
"eval_samples_per_second": 401.471,
"eval_steps_per_second": 3.171,
"step": 300
},
{
"epoch": 0.06997017664601973,
"eval_loss": 0.9087598323822021,
"eval_runtime": 14.1993,
"eval_samples_per_second": 463.685,
"eval_steps_per_second": 3.662,
"step": 305
},
{
"epoch": 0.07111722872218398,
"eval_loss": 0.9022247791290283,
"eval_runtime": 14.3064,
"eval_samples_per_second": 460.214,
"eval_steps_per_second": 3.635,
"step": 310
},
{
"epoch": 0.07226428079834825,
"eval_loss": 0.8950537443161011,
"eval_runtime": 14.2087,
"eval_samples_per_second": 463.377,
"eval_steps_per_second": 3.66,
"step": 315
},
{
"epoch": 0.0734113328745125,
"eval_loss": 0.8874984383583069,
"eval_runtime": 14.2295,
"eval_samples_per_second": 462.699,
"eval_steps_per_second": 3.654,
"step": 320
},
{
"epoch": 0.07455838495067676,
"eval_loss": 0.8809635043144226,
"eval_runtime": 14.2266,
"eval_samples_per_second": 462.796,
"eval_steps_per_second": 3.655,
"step": 325
},
{
"epoch": 0.07570543702684102,
"eval_loss": 0.8745627403259277,
"eval_runtime": 14.2942,
"eval_samples_per_second": 460.608,
"eval_steps_per_second": 3.638,
"step": 330
},
{
"epoch": 0.07685248910300528,
"eval_loss": 0.8683872818946838,
"eval_runtime": 14.1413,
"eval_samples_per_second": 465.586,
"eval_steps_per_second": 3.677,
"step": 335
},
{
"epoch": 0.07799954117916953,
"eval_loss": 0.8624699115753174,
"eval_runtime": 14.1694,
"eval_samples_per_second": 464.662,
"eval_steps_per_second": 3.67,
"step": 340
},
{
"epoch": 0.0791465932553338,
"eval_loss": 0.8569262027740479,
"eval_runtime": 14.2132,
"eval_samples_per_second": 463.231,
"eval_steps_per_second": 3.659,
"step": 345
},
{
"epoch": 0.08029364533149805,
"eval_loss": 0.8515614867210388,
"eval_runtime": 14.3011,
"eval_samples_per_second": 460.383,
"eval_steps_per_second": 3.636,
"step": 350
},
{
"epoch": 0.08144069740766231,
"eval_loss": 0.8465690612792969,
"eval_runtime": 14.2483,
"eval_samples_per_second": 462.09,
"eval_steps_per_second": 3.65,
"step": 355
},
{
"epoch": 0.08258774948382656,
"eval_loss": 0.8418980836868286,
"eval_runtime": 14.1978,
"eval_samples_per_second": 463.735,
"eval_steps_per_second": 3.663,
"step": 360
},
{
"epoch": 0.08373480155999083,
"eval_loss": 0.836972177028656,
"eval_runtime": 16.4774,
"eval_samples_per_second": 399.577,
"eval_steps_per_second": 3.156,
"step": 365
},
{
"epoch": 0.08488185363615508,
"eval_loss": 0.8320812582969666,
"eval_runtime": 14.3098,
"eval_samples_per_second": 460.103,
"eval_steps_per_second": 3.634,
"step": 370
},
{
"epoch": 0.08602890571231935,
"eval_loss": 0.8273819088935852,
"eval_runtime": 14.2755,
"eval_samples_per_second": 461.209,
"eval_steps_per_second": 3.643,
"step": 375
},
{
"epoch": 0.0871759577884836,
"eval_loss": 0.8223117589950562,
"eval_runtime": 14.1567,
"eval_samples_per_second": 465.081,
"eval_steps_per_second": 3.673,
"step": 380
},
{
"epoch": 0.08832300986464786,
"eval_loss": 0.8169983625411987,
"eval_runtime": 14.1912,
"eval_samples_per_second": 463.948,
"eval_steps_per_second": 3.664,
"step": 385
},
{
"epoch": 0.08947006194081211,
"eval_loss": 0.811504602432251,
"eval_runtime": 14.1791,
"eval_samples_per_second": 464.347,
"eval_steps_per_second": 3.667,
"step": 390
},
{
"epoch": 0.09061711401697638,
"eval_loss": 0.8054670095443726,
"eval_runtime": 14.2016,
"eval_samples_per_second": 463.611,
"eval_steps_per_second": 3.662,
"step": 395
},
{
"epoch": 0.09176416609314063,
"grad_norm": 5.852241516113281,
"learning_rate": 3.0581039755351682e-06,
"loss": 1.3859,
"step": 400
},
{
"epoch": 0.09176416609314063,
"eval_loss": 0.8003625273704529,
"eval_runtime": 16.3667,
"eval_samples_per_second": 402.281,
"eval_steps_per_second": 3.177,
"step": 400
}
],
"logging_steps": 100,
"max_steps": 13077,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}