Monday-Someday's picture
End of training
1ee7715 verified
{
"best_metric": 0.9276220745449292,
"best_model_checkpoint": "mobilenet_v2_1.0_224-finetuned-ISIC-dec2024test\\checkpoint-2430",
"epoch": 4.998459167950694,
"eval_steps": 500,
"global_step": 2430,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02054442732408834,
"grad_norm": 23.202468872070312,
"learning_rate": 2.05761316872428e-06,
"loss": 2.2149,
"step": 10
},
{
"epoch": 0.04108885464817668,
"grad_norm": 19.670244216918945,
"learning_rate": 4.11522633744856e-06,
"loss": 2.2202,
"step": 20
},
{
"epoch": 0.061633281972265024,
"grad_norm": 18.64865493774414,
"learning_rate": 6.172839506172839e-06,
"loss": 2.1212,
"step": 30
},
{
"epoch": 0.08217770929635336,
"grad_norm": 20.16490936279297,
"learning_rate": 8.23045267489712e-06,
"loss": 2.0225,
"step": 40
},
{
"epoch": 0.1027221366204417,
"grad_norm": 17.26641845703125,
"learning_rate": 1.02880658436214e-05,
"loss": 1.9166,
"step": 50
},
{
"epoch": 0.12326656394453005,
"grad_norm": 13.308320045471191,
"learning_rate": 1.2345679012345678e-05,
"loss": 1.7424,
"step": 60
},
{
"epoch": 0.14381099126861838,
"grad_norm": 14.259194374084473,
"learning_rate": 1.440329218106996e-05,
"loss": 1.5742,
"step": 70
},
{
"epoch": 0.16435541859270672,
"grad_norm": 9.573338508605957,
"learning_rate": 1.646090534979424e-05,
"loss": 1.4589,
"step": 80
},
{
"epoch": 0.18489984591679506,
"grad_norm": 7.806981086730957,
"learning_rate": 1.8518518518518518e-05,
"loss": 1.2895,
"step": 90
},
{
"epoch": 0.2054442732408834,
"grad_norm": 8.471112251281738,
"learning_rate": 2.05761316872428e-05,
"loss": 1.1936,
"step": 100
},
{
"epoch": 0.22598870056497175,
"grad_norm": 7.020042419433594,
"learning_rate": 2.2633744855967078e-05,
"loss": 0.9931,
"step": 110
},
{
"epoch": 0.2465331278890601,
"grad_norm": 8.448640823364258,
"learning_rate": 2.4691358024691357e-05,
"loss": 1.0736,
"step": 120
},
{
"epoch": 0.2670775552131484,
"grad_norm": 6.455481052398682,
"learning_rate": 2.6748971193415638e-05,
"loss": 1.0588,
"step": 130
},
{
"epoch": 0.28762198253723675,
"grad_norm": 7.331775665283203,
"learning_rate": 2.880658436213992e-05,
"loss": 1.0402,
"step": 140
},
{
"epoch": 0.3081664098613251,
"grad_norm": 6.955947399139404,
"learning_rate": 3.08641975308642e-05,
"loss": 0.9195,
"step": 150
},
{
"epoch": 0.32871083718541344,
"grad_norm": 7.389803886413574,
"learning_rate": 3.292181069958848e-05,
"loss": 1.0719,
"step": 160
},
{
"epoch": 0.3492552645095018,
"grad_norm": 7.89853572845459,
"learning_rate": 3.497942386831276e-05,
"loss": 1.0268,
"step": 170
},
{
"epoch": 0.3697996918335901,
"grad_norm": 6.920297622680664,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.8495,
"step": 180
},
{
"epoch": 0.39034411915767847,
"grad_norm": 6.326653480529785,
"learning_rate": 3.909465020576132e-05,
"loss": 0.8852,
"step": 190
},
{
"epoch": 0.4108885464817668,
"grad_norm": 6.971718788146973,
"learning_rate": 4.11522633744856e-05,
"loss": 0.8968,
"step": 200
},
{
"epoch": 0.43143297380585516,
"grad_norm": 5.848041534423828,
"learning_rate": 4.3209876543209875e-05,
"loss": 0.9205,
"step": 210
},
{
"epoch": 0.4519774011299435,
"grad_norm": 8.545123100280762,
"learning_rate": 4.5267489711934157e-05,
"loss": 1.0261,
"step": 220
},
{
"epoch": 0.47252182845403184,
"grad_norm": 6.959014892578125,
"learning_rate": 4.732510288065844e-05,
"loss": 0.8751,
"step": 230
},
{
"epoch": 0.4930662557781202,
"grad_norm": 8.537935256958008,
"learning_rate": 4.938271604938271e-05,
"loss": 0.9604,
"step": 240
},
{
"epoch": 0.5136106831022085,
"grad_norm": 7.854872226715088,
"learning_rate": 4.983996342021033e-05,
"loss": 0.9493,
"step": 250
},
{
"epoch": 0.5341551104262968,
"grad_norm": 8.308143615722656,
"learning_rate": 4.9611339734796525e-05,
"loss": 0.9021,
"step": 260
},
{
"epoch": 0.5546995377503852,
"grad_norm": 8.60777759552002,
"learning_rate": 4.938271604938271e-05,
"loss": 0.9398,
"step": 270
},
{
"epoch": 0.5752439650744735,
"grad_norm": 6.065791606903076,
"learning_rate": 4.9154092363968915e-05,
"loss": 0.8961,
"step": 280
},
{
"epoch": 0.5957883923985619,
"grad_norm": 7.975262641906738,
"learning_rate": 4.89254686785551e-05,
"loss": 0.9541,
"step": 290
},
{
"epoch": 0.6163328197226502,
"grad_norm": 8.56069278717041,
"learning_rate": 4.86968449931413e-05,
"loss": 1.0714,
"step": 300
},
{
"epoch": 0.6368772470467385,
"grad_norm": 7.618618488311768,
"learning_rate": 4.8468221307727485e-05,
"loss": 0.9913,
"step": 310
},
{
"epoch": 0.6574216743708269,
"grad_norm": 5.433694839477539,
"learning_rate": 4.823959762231367e-05,
"loss": 0.8266,
"step": 320
},
{
"epoch": 0.6779661016949152,
"grad_norm": 6.71955680847168,
"learning_rate": 4.801097393689987e-05,
"loss": 0.9065,
"step": 330
},
{
"epoch": 0.6985105290190036,
"grad_norm": 7.317810535430908,
"learning_rate": 4.7782350251486056e-05,
"loss": 0.8529,
"step": 340
},
{
"epoch": 0.7190549563430919,
"grad_norm": 7.955277919769287,
"learning_rate": 4.755372656607225e-05,
"loss": 0.9454,
"step": 350
},
{
"epoch": 0.7395993836671803,
"grad_norm": 8.274344444274902,
"learning_rate": 4.732510288065844e-05,
"loss": 0.9462,
"step": 360
},
{
"epoch": 0.7601438109912686,
"grad_norm": 6.541558265686035,
"learning_rate": 4.709647919524463e-05,
"loss": 0.8823,
"step": 370
},
{
"epoch": 0.7806882383153569,
"grad_norm": 7.624207019805908,
"learning_rate": 4.686785550983082e-05,
"loss": 0.9984,
"step": 380
},
{
"epoch": 0.8012326656394453,
"grad_norm": 7.345012187957764,
"learning_rate": 4.6639231824417016e-05,
"loss": 0.9381,
"step": 390
},
{
"epoch": 0.8217770929635336,
"grad_norm": 7.9643096923828125,
"learning_rate": 4.6410608139003203e-05,
"loss": 0.9472,
"step": 400
},
{
"epoch": 0.842321520287622,
"grad_norm": 6.939286231994629,
"learning_rate": 4.618198445358939e-05,
"loss": 0.9275,
"step": 410
},
{
"epoch": 0.8628659476117103,
"grad_norm": 8.748644828796387,
"learning_rate": 4.5953360768175586e-05,
"loss": 1.0875,
"step": 420
},
{
"epoch": 0.8834103749357987,
"grad_norm": 6.044397354125977,
"learning_rate": 4.5724737082761774e-05,
"loss": 1.0232,
"step": 430
},
{
"epoch": 0.903954802259887,
"grad_norm": 10.523336410522461,
"learning_rate": 4.549611339734797e-05,
"loss": 0.9283,
"step": 440
},
{
"epoch": 0.9244992295839753,
"grad_norm": 5.524984836578369,
"learning_rate": 4.5267489711934157e-05,
"loss": 0.9606,
"step": 450
},
{
"epoch": 0.9450436569080637,
"grad_norm": 7.313499450683594,
"learning_rate": 4.503886602652035e-05,
"loss": 0.8652,
"step": 460
},
{
"epoch": 0.965588084232152,
"grad_norm": 6.494114398956299,
"learning_rate": 4.481024234110654e-05,
"loss": 0.9293,
"step": 470
},
{
"epoch": 0.9861325115562404,
"grad_norm": 6.250232219696045,
"learning_rate": 4.4581618655692734e-05,
"loss": 0.9055,
"step": 480
},
{
"epoch": 0.9984591679506933,
"eval_accuracy": 0.9195319271886738,
"eval_loss": 0.19548115134239197,
"eval_runtime": 1093.5686,
"eval_samples_per_second": 6.33,
"eval_steps_per_second": 0.198,
"step": 486
},
{
"epoch": 1.0082177709296354,
"grad_norm": 5.486711025238037,
"learning_rate": 4.435299497027892e-05,
"loss": 0.8596,
"step": 490
},
{
"epoch": 1.0287621982537236,
"grad_norm": 7.477694988250732,
"learning_rate": 4.412437128486511e-05,
"loss": 0.8668,
"step": 500
},
{
"epoch": 1.049306625577812,
"grad_norm": 5.820909023284912,
"learning_rate": 4.3895747599451304e-05,
"loss": 0.9094,
"step": 510
},
{
"epoch": 1.0698510529019003,
"grad_norm": 8.668384552001953,
"learning_rate": 4.366712391403749e-05,
"loss": 0.8712,
"step": 520
},
{
"epoch": 1.0903954802259888,
"grad_norm": 6.3633575439453125,
"learning_rate": 4.343850022862369e-05,
"loss": 0.8525,
"step": 530
},
{
"epoch": 1.110939907550077,
"grad_norm": 9.032384872436523,
"learning_rate": 4.3209876543209875e-05,
"loss": 0.7651,
"step": 540
},
{
"epoch": 1.1314843348741654,
"grad_norm": 8.188101768493652,
"learning_rate": 4.298125285779607e-05,
"loss": 0.9195,
"step": 550
},
{
"epoch": 1.1520287621982537,
"grad_norm": 6.771944046020508,
"learning_rate": 4.2752629172382264e-05,
"loss": 0.9012,
"step": 560
},
{
"epoch": 1.1725731895223421,
"grad_norm": 6.4685187339782715,
"learning_rate": 4.252400548696845e-05,
"loss": 0.9641,
"step": 570
},
{
"epoch": 1.1931176168464304,
"grad_norm": 6.535536289215088,
"learning_rate": 4.229538180155465e-05,
"loss": 0.8175,
"step": 580
},
{
"epoch": 1.2136620441705188,
"grad_norm": 7.542140960693359,
"learning_rate": 4.2066758116140835e-05,
"loss": 0.9377,
"step": 590
},
{
"epoch": 1.234206471494607,
"grad_norm": 5.927305221557617,
"learning_rate": 4.183813443072703e-05,
"loss": 0.9509,
"step": 600
},
{
"epoch": 1.2547508988186955,
"grad_norm": 7.140214920043945,
"learning_rate": 4.160951074531322e-05,
"loss": 0.8999,
"step": 610
},
{
"epoch": 1.2752953261427837,
"grad_norm": 7.2755231857299805,
"learning_rate": 4.138088705989941e-05,
"loss": 0.9626,
"step": 620
},
{
"epoch": 1.2958397534668722,
"grad_norm": 5.402399063110352,
"learning_rate": 4.11522633744856e-05,
"loss": 0.7817,
"step": 630
},
{
"epoch": 1.3163841807909604,
"grad_norm": 5.163234710693359,
"learning_rate": 4.092363968907179e-05,
"loss": 0.8986,
"step": 640
},
{
"epoch": 1.3369286081150489,
"grad_norm": 7.63501501083374,
"learning_rate": 4.069501600365798e-05,
"loss": 0.7566,
"step": 650
},
{
"epoch": 1.357473035439137,
"grad_norm": 7.454900741577148,
"learning_rate": 4.046639231824417e-05,
"loss": 0.8702,
"step": 660
},
{
"epoch": 1.3780174627632253,
"grad_norm": 6.798664569854736,
"learning_rate": 4.0237768632830365e-05,
"loss": 0.9139,
"step": 670
},
{
"epoch": 1.3985618900873138,
"grad_norm": 6.475697040557861,
"learning_rate": 4.000914494741655e-05,
"loss": 0.8935,
"step": 680
},
{
"epoch": 1.4191063174114023,
"grad_norm": 7.091508865356445,
"learning_rate": 3.978052126200275e-05,
"loss": 0.8321,
"step": 690
},
{
"epoch": 1.4396507447354905,
"grad_norm": 6.339083671569824,
"learning_rate": 3.9551897576588936e-05,
"loss": 0.803,
"step": 700
},
{
"epoch": 1.4601951720595787,
"grad_norm": 7.827945709228516,
"learning_rate": 3.932327389117513e-05,
"loss": 0.9727,
"step": 710
},
{
"epoch": 1.4807395993836672,
"grad_norm": 7.140174865722656,
"learning_rate": 3.909465020576132e-05,
"loss": 0.7649,
"step": 720
},
{
"epoch": 1.5012840267077556,
"grad_norm": 6.504294395446777,
"learning_rate": 3.8866026520347506e-05,
"loss": 0.8051,
"step": 730
},
{
"epoch": 1.5218284540318439,
"grad_norm": 7.512494087219238,
"learning_rate": 3.86374028349337e-05,
"loss": 0.9029,
"step": 740
},
{
"epoch": 1.542372881355932,
"grad_norm": 6.313861846923828,
"learning_rate": 3.840877914951989e-05,
"loss": 0.8271,
"step": 750
},
{
"epoch": 1.5629173086800205,
"grad_norm": 7.215080738067627,
"learning_rate": 3.8180155464106083e-05,
"loss": 0.9254,
"step": 760
},
{
"epoch": 1.583461736004109,
"grad_norm": 6.017473220825195,
"learning_rate": 3.795153177869227e-05,
"loss": 0.7945,
"step": 770
},
{
"epoch": 1.6040061633281972,
"grad_norm": 6.04453706741333,
"learning_rate": 3.7722908093278466e-05,
"loss": 0.8443,
"step": 780
},
{
"epoch": 1.6245505906522855,
"grad_norm": 7.159928798675537,
"learning_rate": 3.7494284407864654e-05,
"loss": 0.9719,
"step": 790
},
{
"epoch": 1.645095017976374,
"grad_norm": 6.73528528213501,
"learning_rate": 3.726566072245085e-05,
"loss": 0.8892,
"step": 800
},
{
"epoch": 1.6656394453004624,
"grad_norm": 7.667166709899902,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.8208,
"step": 810
},
{
"epoch": 1.6861838726245506,
"grad_norm": 9.423199653625488,
"learning_rate": 3.6808413351623224e-05,
"loss": 0.8314,
"step": 820
},
{
"epoch": 1.7067282999486388,
"grad_norm": 8.343061447143555,
"learning_rate": 3.657978966620942e-05,
"loss": 0.8463,
"step": 830
},
{
"epoch": 1.7272727272727273,
"grad_norm": 5.084173679351807,
"learning_rate": 3.635116598079561e-05,
"loss": 0.8542,
"step": 840
},
{
"epoch": 1.7478171545968157,
"grad_norm": 6.383463382720947,
"learning_rate": 3.612254229538181e-05,
"loss": 0.8588,
"step": 850
},
{
"epoch": 1.768361581920904,
"grad_norm": 5.376101016998291,
"learning_rate": 3.5893918609967996e-05,
"loss": 0.752,
"step": 860
},
{
"epoch": 1.7889060092449922,
"grad_norm": 7.792232036590576,
"learning_rate": 3.566529492455419e-05,
"loss": 0.7482,
"step": 870
},
{
"epoch": 1.8094504365690807,
"grad_norm": 7.509520053863525,
"learning_rate": 3.543667123914038e-05,
"loss": 0.8904,
"step": 880
},
{
"epoch": 1.8299948638931691,
"grad_norm": 5.035109996795654,
"learning_rate": 3.520804755372657e-05,
"loss": 0.7929,
"step": 890
},
{
"epoch": 1.8505392912172574,
"grad_norm": 6.189474105834961,
"learning_rate": 3.497942386831276e-05,
"loss": 0.824,
"step": 900
},
{
"epoch": 1.8710837185413456,
"grad_norm": 5.561000347137451,
"learning_rate": 3.475080018289895e-05,
"loss": 0.8891,
"step": 910
},
{
"epoch": 1.891628145865434,
"grad_norm": 5.997035026550293,
"learning_rate": 3.4522176497485144e-05,
"loss": 0.8758,
"step": 920
},
{
"epoch": 1.9121725731895225,
"grad_norm": 8.139898300170898,
"learning_rate": 3.429355281207133e-05,
"loss": 0.8804,
"step": 930
},
{
"epoch": 1.9327170005136107,
"grad_norm": 5.594916820526123,
"learning_rate": 3.406492912665753e-05,
"loss": 0.8034,
"step": 940
},
{
"epoch": 1.953261427837699,
"grad_norm": 6.618235111236572,
"learning_rate": 3.3836305441243715e-05,
"loss": 0.8684,
"step": 950
},
{
"epoch": 1.9738058551617874,
"grad_norm": 5.163496017456055,
"learning_rate": 3.360768175582991e-05,
"loss": 0.8248,
"step": 960
},
{
"epoch": 1.9943502824858759,
"grad_norm": 6.3719916343688965,
"learning_rate": 3.33790580704161e-05,
"loss": 0.8797,
"step": 970
},
{
"epoch": 1.9984591679506933,
"eval_accuracy": 0.9137532505056342,
"eval_loss": 0.20739668607711792,
"eval_runtime": 1009.1763,
"eval_samples_per_second": 6.859,
"eval_steps_per_second": 0.215,
"step": 972
},
{
"epoch": 2.0164355418592708,
"grad_norm": 4.454899787902832,
"learning_rate": 3.3150434385002285e-05,
"loss": 0.7782,
"step": 980
},
{
"epoch": 2.036979969183359,
"grad_norm": 6.4835686683654785,
"learning_rate": 3.292181069958848e-05,
"loss": 0.9306,
"step": 990
},
{
"epoch": 2.0575243965074472,
"grad_norm": 7.925194263458252,
"learning_rate": 3.269318701417467e-05,
"loss": 0.8513,
"step": 1000
},
{
"epoch": 2.078068823831536,
"grad_norm": 6.563453197479248,
"learning_rate": 3.246456332876086e-05,
"loss": 0.7011,
"step": 1010
},
{
"epoch": 2.098613251155624,
"grad_norm": 7.153360366821289,
"learning_rate": 3.223593964334705e-05,
"loss": 0.8101,
"step": 1020
},
{
"epoch": 2.1191576784797124,
"grad_norm": 6.468135356903076,
"learning_rate": 3.2007315957933245e-05,
"loss": 0.9004,
"step": 1030
},
{
"epoch": 2.1397021058038006,
"grad_norm": 7.5966796875,
"learning_rate": 3.177869227251943e-05,
"loss": 0.7842,
"step": 1040
},
{
"epoch": 2.1602465331278893,
"grad_norm": 6.29899787902832,
"learning_rate": 3.155006858710563e-05,
"loss": 0.7849,
"step": 1050
},
{
"epoch": 2.1807909604519775,
"grad_norm": 7.601044654846191,
"learning_rate": 3.1321444901691816e-05,
"loss": 0.8189,
"step": 1060
},
{
"epoch": 2.2013353877760657,
"grad_norm": 4.997586727142334,
"learning_rate": 3.1092821216278004e-05,
"loss": 0.9379,
"step": 1070
},
{
"epoch": 2.221879815100154,
"grad_norm": 6.758198261260986,
"learning_rate": 3.08641975308642e-05,
"loss": 0.8441,
"step": 1080
},
{
"epoch": 2.242424242424242,
"grad_norm": 8.121203422546387,
"learning_rate": 3.0635573845450386e-05,
"loss": 0.8064,
"step": 1090
},
{
"epoch": 2.262968669748331,
"grad_norm": 6.168539047241211,
"learning_rate": 3.0406950160036577e-05,
"loss": 0.8648,
"step": 1100
},
{
"epoch": 2.283513097072419,
"grad_norm": 8.698349952697754,
"learning_rate": 3.017832647462277e-05,
"loss": 0.9455,
"step": 1110
},
{
"epoch": 2.3040575243965074,
"grad_norm": 6.176644802093506,
"learning_rate": 2.994970278920896e-05,
"loss": 0.7805,
"step": 1120
},
{
"epoch": 2.324601951720596,
"grad_norm": 6.344507694244385,
"learning_rate": 2.972107910379515e-05,
"loss": 0.6886,
"step": 1130
},
{
"epoch": 2.3451463790446843,
"grad_norm": 9.571560859680176,
"learning_rate": 2.949245541838135e-05,
"loss": 0.8937,
"step": 1140
},
{
"epoch": 2.3656908063687725,
"grad_norm": 7.252879619598389,
"learning_rate": 2.926383173296754e-05,
"loss": 0.831,
"step": 1150
},
{
"epoch": 2.3862352336928607,
"grad_norm": 8.151792526245117,
"learning_rate": 2.903520804755373e-05,
"loss": 0.8442,
"step": 1160
},
{
"epoch": 2.406779661016949,
"grad_norm": 7.087300777435303,
"learning_rate": 2.880658436213992e-05,
"loss": 0.7779,
"step": 1170
},
{
"epoch": 2.4273240883410376,
"grad_norm": 6.533515930175781,
"learning_rate": 2.857796067672611e-05,
"loss": 0.8222,
"step": 1180
},
{
"epoch": 2.447868515665126,
"grad_norm": 7.180200576782227,
"learning_rate": 2.8349336991312303e-05,
"loss": 0.8127,
"step": 1190
},
{
"epoch": 2.468412942989214,
"grad_norm": 6.345178604125977,
"learning_rate": 2.8120713305898494e-05,
"loss": 0.8132,
"step": 1200
},
{
"epoch": 2.4889573703133023,
"grad_norm": 7.64429235458374,
"learning_rate": 2.7892089620484685e-05,
"loss": 0.9019,
"step": 1210
},
{
"epoch": 2.509501797637391,
"grad_norm": 8.274768829345703,
"learning_rate": 2.7663465935070876e-05,
"loss": 0.8992,
"step": 1220
},
{
"epoch": 2.5300462249614792,
"grad_norm": 6.977597713470459,
"learning_rate": 2.7434842249657068e-05,
"loss": 0.8527,
"step": 1230
},
{
"epoch": 2.5505906522855675,
"grad_norm": 6.692368030548096,
"learning_rate": 2.720621856424326e-05,
"loss": 0.8021,
"step": 1240
},
{
"epoch": 2.5711350796096557,
"grad_norm": 6.690873622894287,
"learning_rate": 2.6977594878829447e-05,
"loss": 0.8583,
"step": 1250
},
{
"epoch": 2.5916795069337444,
"grad_norm": 6.140628337860107,
"learning_rate": 2.6748971193415638e-05,
"loss": 0.859,
"step": 1260
},
{
"epoch": 2.6122239342578326,
"grad_norm": 6.804861545562744,
"learning_rate": 2.652034750800183e-05,
"loss": 0.7306,
"step": 1270
},
{
"epoch": 2.632768361581921,
"grad_norm": 7.271435260772705,
"learning_rate": 2.629172382258802e-05,
"loss": 0.8166,
"step": 1280
},
{
"epoch": 2.653312788906009,
"grad_norm": 6.456128120422363,
"learning_rate": 2.6063100137174212e-05,
"loss": 0.9386,
"step": 1290
},
{
"epoch": 2.6738572162300978,
"grad_norm": 7.159631252288818,
"learning_rate": 2.5834476451760403e-05,
"loss": 0.8064,
"step": 1300
},
{
"epoch": 2.694401643554186,
"grad_norm": 6.154369831085205,
"learning_rate": 2.5605852766346595e-05,
"loss": 0.8017,
"step": 1310
},
{
"epoch": 2.714946070878274,
"grad_norm": 6.905427932739258,
"learning_rate": 2.5377229080932786e-05,
"loss": 0.8414,
"step": 1320
},
{
"epoch": 2.7354904982023625,
"grad_norm": 7.776165962219238,
"learning_rate": 2.5148605395518977e-05,
"loss": 0.8297,
"step": 1330
},
{
"epoch": 2.7560349255264507,
"grad_norm": 6.178536415100098,
"learning_rate": 2.4919981710105165e-05,
"loss": 0.7867,
"step": 1340
},
{
"epoch": 2.7765793528505394,
"grad_norm": 5.276233196258545,
"learning_rate": 2.4691358024691357e-05,
"loss": 0.8698,
"step": 1350
},
{
"epoch": 2.7971237801746276,
"grad_norm": 5.217291355133057,
"learning_rate": 2.446273433927755e-05,
"loss": 0.8277,
"step": 1360
},
{
"epoch": 2.817668207498716,
"grad_norm": 5.9258856773376465,
"learning_rate": 2.4234110653863743e-05,
"loss": 0.8079,
"step": 1370
},
{
"epoch": 2.8382126348228045,
"grad_norm": 5.345384120941162,
"learning_rate": 2.4005486968449934e-05,
"loss": 0.7356,
"step": 1380
},
{
"epoch": 2.8587570621468927,
"grad_norm": 6.879024505615234,
"learning_rate": 2.3776863283036125e-05,
"loss": 0.8116,
"step": 1390
},
{
"epoch": 2.879301489470981,
"grad_norm": 5.867737770080566,
"learning_rate": 2.3548239597622316e-05,
"loss": 0.7428,
"step": 1400
},
{
"epoch": 2.899845916795069,
"grad_norm": 6.256878852844238,
"learning_rate": 2.3319615912208508e-05,
"loss": 0.8134,
"step": 1410
},
{
"epoch": 2.9203903441191574,
"grad_norm": 6.346487045288086,
"learning_rate": 2.3090992226794696e-05,
"loss": 0.6877,
"step": 1420
},
{
"epoch": 2.940934771443246,
"grad_norm": 5.782416820526123,
"learning_rate": 2.2862368541380887e-05,
"loss": 0.8478,
"step": 1430
},
{
"epoch": 2.9614791987673343,
"grad_norm": 6.417751312255859,
"learning_rate": 2.2633744855967078e-05,
"loss": 0.7668,
"step": 1440
},
{
"epoch": 2.9820236260914226,
"grad_norm": 6.743643760681152,
"learning_rate": 2.240512117055327e-05,
"loss": 0.8144,
"step": 1450
},
{
"epoch": 2.9984591679506933,
"eval_accuracy": 0.9263218722912453,
"eval_loss": 0.17973794043064117,
"eval_runtime": 1009.1137,
"eval_samples_per_second": 6.859,
"eval_steps_per_second": 0.215,
"step": 1458
},
{
"epoch": 3.0041088854648175,
"grad_norm": 6.051700592041016,
"learning_rate": 2.217649748513946e-05,
"loss": 0.9408,
"step": 1460
},
{
"epoch": 3.024653312788906,
"grad_norm": 8.463972091674805,
"learning_rate": 2.1947873799725652e-05,
"loss": 0.7778,
"step": 1470
},
{
"epoch": 3.0451977401129944,
"grad_norm": 6.033344268798828,
"learning_rate": 2.1719250114311843e-05,
"loss": 0.7648,
"step": 1480
},
{
"epoch": 3.0657421674370826,
"grad_norm": 6.287738800048828,
"learning_rate": 2.1490626428898035e-05,
"loss": 0.7507,
"step": 1490
},
{
"epoch": 3.086286594761171,
"grad_norm": 6.505873680114746,
"learning_rate": 2.1262002743484226e-05,
"loss": 0.7668,
"step": 1500
},
{
"epoch": 3.1068310220852595,
"grad_norm": 5.928491115570068,
"learning_rate": 2.1033379058070417e-05,
"loss": 0.8387,
"step": 1510
},
{
"epoch": 3.1273754494093478,
"grad_norm": 8.137348175048828,
"learning_rate": 2.080475537265661e-05,
"loss": 0.7546,
"step": 1520
},
{
"epoch": 3.147919876733436,
"grad_norm": 6.729381561279297,
"learning_rate": 2.05761316872428e-05,
"loss": 0.7086,
"step": 1530
},
{
"epoch": 3.168464304057524,
"grad_norm": 6.623689651489258,
"learning_rate": 2.034750800182899e-05,
"loss": 0.9436,
"step": 1540
},
{
"epoch": 3.189008731381613,
"grad_norm": 7.943443298339844,
"learning_rate": 2.0118884316415183e-05,
"loss": 0.8645,
"step": 1550
},
{
"epoch": 3.209553158705701,
"grad_norm": 5.353769302368164,
"learning_rate": 1.9890260631001374e-05,
"loss": 0.7909,
"step": 1560
},
{
"epoch": 3.2300975860297894,
"grad_norm": 6.193889141082764,
"learning_rate": 1.9661636945587565e-05,
"loss": 0.7595,
"step": 1570
},
{
"epoch": 3.2506420133538776,
"grad_norm": 8.73640251159668,
"learning_rate": 1.9433013260173753e-05,
"loss": 0.729,
"step": 1580
},
{
"epoch": 3.2711864406779663,
"grad_norm": 6.345188617706299,
"learning_rate": 1.9204389574759944e-05,
"loss": 0.6933,
"step": 1590
},
{
"epoch": 3.2917308680020545,
"grad_norm": 9.154464721679688,
"learning_rate": 1.8975765889346136e-05,
"loss": 0.8427,
"step": 1600
},
{
"epoch": 3.3122752953261427,
"grad_norm": 7.497635841369629,
"learning_rate": 1.8747142203932327e-05,
"loss": 0.7922,
"step": 1610
},
{
"epoch": 3.332819722650231,
"grad_norm": 6.4137468338012695,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.7468,
"step": 1620
},
{
"epoch": 3.3533641499743196,
"grad_norm": 8.157144546508789,
"learning_rate": 1.828989483310471e-05,
"loss": 0.7347,
"step": 1630
},
{
"epoch": 3.373908577298408,
"grad_norm": 5.25002908706665,
"learning_rate": 1.8061271147690904e-05,
"loss": 0.7833,
"step": 1640
},
{
"epoch": 3.394453004622496,
"grad_norm": 6.77322244644165,
"learning_rate": 1.7832647462277096e-05,
"loss": 0.7639,
"step": 1650
},
{
"epoch": 3.4149974319465843,
"grad_norm": 6.466352462768555,
"learning_rate": 1.7604023776863283e-05,
"loss": 0.6913,
"step": 1660
},
{
"epoch": 3.435541859270673,
"grad_norm": 6.149074077606201,
"learning_rate": 1.7375400091449475e-05,
"loss": 0.85,
"step": 1670
},
{
"epoch": 3.4560862865947612,
"grad_norm": 8.840483665466309,
"learning_rate": 1.7146776406035666e-05,
"loss": 0.8483,
"step": 1680
},
{
"epoch": 3.4766307139188495,
"grad_norm": 7.109951019287109,
"learning_rate": 1.6918152720621857e-05,
"loss": 0.8669,
"step": 1690
},
{
"epoch": 3.4971751412429377,
"grad_norm": 7.431482315063477,
"learning_rate": 1.668952903520805e-05,
"loss": 0.7942,
"step": 1700
},
{
"epoch": 3.517719568567026,
"grad_norm": 8.127689361572266,
"learning_rate": 1.646090534979424e-05,
"loss": 0.8801,
"step": 1710
},
{
"epoch": 3.5382639958911146,
"grad_norm": 5.593295574188232,
"learning_rate": 1.623228166438043e-05,
"loss": 0.797,
"step": 1720
},
{
"epoch": 3.558808423215203,
"grad_norm": 6.143307685852051,
"learning_rate": 1.6003657978966623e-05,
"loss": 0.8404,
"step": 1730
},
{
"epoch": 3.579352850539291,
"grad_norm": 7.268124103546143,
"learning_rate": 1.5775034293552814e-05,
"loss": 0.7545,
"step": 1740
},
{
"epoch": 3.5998972778633798,
"grad_norm": 6.966310501098633,
"learning_rate": 1.5546410608139002e-05,
"loss": 0.7656,
"step": 1750
},
{
"epoch": 3.620441705187468,
"grad_norm": 7.398248672485352,
"learning_rate": 1.5317786922725193e-05,
"loss": 0.8126,
"step": 1760
},
{
"epoch": 3.6409861325115562,
"grad_norm": 7.494919776916504,
"learning_rate": 1.5089163237311384e-05,
"loss": 0.908,
"step": 1770
},
{
"epoch": 3.6615305598356445,
"grad_norm": 5.857889175415039,
"learning_rate": 1.4860539551897576e-05,
"loss": 0.7003,
"step": 1780
},
{
"epoch": 3.6820749871597327,
"grad_norm": 8.693001747131348,
"learning_rate": 1.463191586648377e-05,
"loss": 0.8321,
"step": 1790
},
{
"epoch": 3.7026194144838214,
"grad_norm": 8.051487922668457,
"learning_rate": 1.440329218106996e-05,
"loss": 0.8759,
"step": 1800
},
{
"epoch": 3.7231638418079096,
"grad_norm": 5.1894612312316895,
"learning_rate": 1.4174668495656151e-05,
"loss": 0.7325,
"step": 1810
},
{
"epoch": 3.743708269131998,
"grad_norm": 5.104062557220459,
"learning_rate": 1.3946044810242343e-05,
"loss": 0.7732,
"step": 1820
},
{
"epoch": 3.7642526964560865,
"grad_norm": 7.708363056182861,
"learning_rate": 1.3717421124828534e-05,
"loss": 0.8328,
"step": 1830
},
{
"epoch": 3.7847971237801747,
"grad_norm": 6.15858268737793,
"learning_rate": 1.3488797439414723e-05,
"loss": 0.738,
"step": 1840
},
{
"epoch": 3.805341551104263,
"grad_norm": 7.959890365600586,
"learning_rate": 1.3260173754000915e-05,
"loss": 0.818,
"step": 1850
},
{
"epoch": 3.825885978428351,
"grad_norm": 7.602783679962158,
"learning_rate": 1.3031550068587106e-05,
"loss": 0.9028,
"step": 1860
},
{
"epoch": 3.8464304057524394,
"grad_norm": 7.417806625366211,
"learning_rate": 1.2802926383173297e-05,
"loss": 0.8158,
"step": 1870
},
{
"epoch": 3.866974833076528,
"grad_norm": 6.925180435180664,
"learning_rate": 1.2574302697759489e-05,
"loss": 0.7867,
"step": 1880
},
{
"epoch": 3.8875192604006163,
"grad_norm": 4.716423034667969,
"learning_rate": 1.2345679012345678e-05,
"loss": 0.6969,
"step": 1890
},
{
"epoch": 3.9080636877247046,
"grad_norm": 5.976194381713867,
"learning_rate": 1.2117055326931871e-05,
"loss": 0.7292,
"step": 1900
},
{
"epoch": 3.9286081150487933,
"grad_norm": 6.812644958496094,
"learning_rate": 1.1888431641518063e-05,
"loss": 0.7845,
"step": 1910
},
{
"epoch": 3.9491525423728815,
"grad_norm": 7.749550819396973,
"learning_rate": 1.1659807956104254e-05,
"loss": 0.8008,
"step": 1920
},
{
"epoch": 3.9696969696969697,
"grad_norm": 7.299574375152588,
"learning_rate": 1.1431184270690443e-05,
"loss": 0.7896,
"step": 1930
},
{
"epoch": 3.990241397021058,
"grad_norm": 7.994142055511475,
"learning_rate": 1.1202560585276635e-05,
"loss": 0.9243,
"step": 1940
},
{
"epoch": 3.9984591679506933,
"eval_accuracy": 0.9232880670326495,
"eval_loss": 0.18616917729377747,
"eval_runtime": 1074.4755,
"eval_samples_per_second": 6.442,
"eval_steps_per_second": 0.202,
"step": 1944
},
{
"epoch": 4.012326656394453,
"grad_norm": 6.763792991638184,
"learning_rate": 1.0973936899862826e-05,
"loss": 0.7882,
"step": 1950
},
{
"epoch": 4.0328710837185415,
"grad_norm": 7.702907085418701,
"learning_rate": 1.0745313214449017e-05,
"loss": 0.8705,
"step": 1960
},
{
"epoch": 4.05341551104263,
"grad_norm": 6.545944690704346,
"learning_rate": 1.0516689529035209e-05,
"loss": 0.8295,
"step": 1970
},
{
"epoch": 4.073959938366718,
"grad_norm": 7.43347692489624,
"learning_rate": 1.02880658436214e-05,
"loss": 0.7686,
"step": 1980
},
{
"epoch": 4.094504365690806,
"grad_norm": 6.285999298095703,
"learning_rate": 1.0059442158207591e-05,
"loss": 0.7991,
"step": 1990
},
{
"epoch": 4.1150487930148945,
"grad_norm": 8.586403846740723,
"learning_rate": 9.830818472793783e-06,
"loss": 0.7933,
"step": 2000
},
{
"epoch": 4.135593220338983,
"grad_norm": 7.343191623687744,
"learning_rate": 9.602194787379972e-06,
"loss": 0.7762,
"step": 2010
},
{
"epoch": 4.156137647663072,
"grad_norm": 5.3163933753967285,
"learning_rate": 9.373571101966163e-06,
"loss": 0.7508,
"step": 2020
},
{
"epoch": 4.17668207498716,
"grad_norm": 6.618367671966553,
"learning_rate": 9.144947416552355e-06,
"loss": 0.671,
"step": 2030
},
{
"epoch": 4.197226502311248,
"grad_norm": 4.874975681304932,
"learning_rate": 8.916323731138548e-06,
"loss": 0.713,
"step": 2040
},
{
"epoch": 4.2177709296353365,
"grad_norm": 6.649152755737305,
"learning_rate": 8.687700045724737e-06,
"loss": 0.8196,
"step": 2050
},
{
"epoch": 4.238315356959425,
"grad_norm": 6.106600284576416,
"learning_rate": 8.459076360310929e-06,
"loss": 0.744,
"step": 2060
},
{
"epoch": 4.258859784283513,
"grad_norm": 7.9351630210876465,
"learning_rate": 8.23045267489712e-06,
"loss": 0.7915,
"step": 2070
},
{
"epoch": 4.279404211607601,
"grad_norm": 8.719624519348145,
"learning_rate": 8.001828989483311e-06,
"loss": 0.8307,
"step": 2080
},
{
"epoch": 4.299948638931689,
"grad_norm": 8.237951278686523,
"learning_rate": 7.773205304069501e-06,
"loss": 0.7875,
"step": 2090
},
{
"epoch": 4.320493066255779,
"grad_norm": 7.945969581604004,
"learning_rate": 7.544581618655692e-06,
"loss": 0.7283,
"step": 2100
},
{
"epoch": 4.341037493579867,
"grad_norm": 9.574934959411621,
"learning_rate": 7.315957933241885e-06,
"loss": 0.8088,
"step": 2110
},
{
"epoch": 4.361581920903955,
"grad_norm": 7.645279884338379,
"learning_rate": 7.087334247828076e-06,
"loss": 0.77,
"step": 2120
},
{
"epoch": 4.382126348228043,
"grad_norm": 7.663369178771973,
"learning_rate": 6.858710562414267e-06,
"loss": 0.7608,
"step": 2130
},
{
"epoch": 4.4026707755521315,
"grad_norm": 7.651998996734619,
"learning_rate": 6.630086877000457e-06,
"loss": 0.7465,
"step": 2140
},
{
"epoch": 4.42321520287622,
"grad_norm": 6.678437232971191,
"learning_rate": 6.401463191586649e-06,
"loss": 0.725,
"step": 2150
},
{
"epoch": 4.443759630200308,
"grad_norm": 7.033961772918701,
"learning_rate": 6.172839506172839e-06,
"loss": 0.7678,
"step": 2160
},
{
"epoch": 4.464304057524396,
"grad_norm": 7.408419609069824,
"learning_rate": 5.944215820759031e-06,
"loss": 0.7689,
"step": 2170
},
{
"epoch": 4.484848484848484,
"grad_norm": 8.51754379272461,
"learning_rate": 5.715592135345222e-06,
"loss": 0.852,
"step": 2180
},
{
"epoch": 4.5053929121725735,
"grad_norm": 5.929790019989014,
"learning_rate": 5.486968449931413e-06,
"loss": 0.7294,
"step": 2190
},
{
"epoch": 4.525937339496662,
"grad_norm": 7.980464935302734,
"learning_rate": 5.258344764517604e-06,
"loss": 0.8615,
"step": 2200
},
{
"epoch": 4.54648176682075,
"grad_norm": 9.191452026367188,
"learning_rate": 5.029721079103796e-06,
"loss": 0.9008,
"step": 2210
},
{
"epoch": 4.567026194144838,
"grad_norm": 8.33234977722168,
"learning_rate": 4.801097393689986e-06,
"loss": 0.8745,
"step": 2220
},
{
"epoch": 4.5875706214689265,
"grad_norm": 6.941461086273193,
"learning_rate": 4.572473708276177e-06,
"loss": 0.7385,
"step": 2230
},
{
"epoch": 4.608115048793015,
"grad_norm": 4.740252494812012,
"learning_rate": 4.343850022862369e-06,
"loss": 0.7224,
"step": 2240
},
{
"epoch": 4.628659476117103,
"grad_norm": 7.316972255706787,
"learning_rate": 4.11522633744856e-06,
"loss": 0.8351,
"step": 2250
},
{
"epoch": 4.649203903441192,
"grad_norm": 6.795185565948486,
"learning_rate": 3.8866026520347504e-06,
"loss": 0.7554,
"step": 2260
},
{
"epoch": 4.66974833076528,
"grad_norm": 8.930145263671875,
"learning_rate": 3.6579789666209426e-06,
"loss": 0.8177,
"step": 2270
},
{
"epoch": 4.6902927580893685,
"grad_norm": 7.089832782745361,
"learning_rate": 3.4293552812071335e-06,
"loss": 0.7476,
"step": 2280
},
{
"epoch": 4.710837185413457,
"grad_norm": 6.567149639129639,
"learning_rate": 3.2007315957933243e-06,
"loss": 0.841,
"step": 2290
},
{
"epoch": 4.731381612737545,
"grad_norm": 6.558457851409912,
"learning_rate": 2.9721079103795156e-06,
"loss": 0.7575,
"step": 2300
},
{
"epoch": 4.751926040061633,
"grad_norm": 8.95292854309082,
"learning_rate": 2.7434842249657065e-06,
"loss": 0.7493,
"step": 2310
},
{
"epoch": 4.7724704673857214,
"grad_norm": 7.489271640777588,
"learning_rate": 2.514860539551898e-06,
"loss": 0.716,
"step": 2320
},
{
"epoch": 4.79301489470981,
"grad_norm": 7.381133079528809,
"learning_rate": 2.2862368541380887e-06,
"loss": 0.8227,
"step": 2330
},
{
"epoch": 4.813559322033898,
"grad_norm": 8.651927947998047,
"learning_rate": 2.05761316872428e-06,
"loss": 0.7768,
"step": 2340
},
{
"epoch": 4.834103749357987,
"grad_norm": 7.568137168884277,
"learning_rate": 1.8289894833104713e-06,
"loss": 0.7768,
"step": 2350
},
{
"epoch": 4.854648176682075,
"grad_norm": 8.011246681213379,
"learning_rate": 1.6003657978966622e-06,
"loss": 0.7113,
"step": 2360
},
{
"epoch": 4.8751926040061635,
"grad_norm": 8.442760467529297,
"learning_rate": 1.3717421124828533e-06,
"loss": 0.7428,
"step": 2370
},
{
"epoch": 4.895737031330252,
"grad_norm": 5.148819446563721,
"learning_rate": 1.1431184270690443e-06,
"loss": 0.7326,
"step": 2380
},
{
"epoch": 4.91628145865434,
"grad_norm": 6.452014923095703,
"learning_rate": 9.144947416552356e-07,
"loss": 0.8117,
"step": 2390
},
{
"epoch": 4.936825885978428,
"grad_norm": 5.830743789672852,
"learning_rate": 6.858710562414266e-07,
"loss": 0.7118,
"step": 2400
},
{
"epoch": 4.957370313302516,
"grad_norm": 6.933480739593506,
"learning_rate": 4.572473708276178e-07,
"loss": 0.7462,
"step": 2410
},
{
"epoch": 4.977914740626605,
"grad_norm": 6.512324810028076,
"learning_rate": 2.286236854138089e-07,
"loss": 0.7691,
"step": 2420
},
{
"epoch": 4.998459167950694,
"grad_norm": 6.746326446533203,
"learning_rate": 0.0,
"loss": 0.8199,
"step": 2430
},
{
"epoch": 4.998459167950694,
"eval_accuracy": 0.9276220745449292,
"eval_loss": 0.17630332708358765,
"eval_runtime": 1096.6155,
"eval_samples_per_second": 6.312,
"eval_steps_per_second": 0.198,
"step": 2430
},
{
"epoch": 4.998459167950694,
"step": 2430,
"total_flos": 8.175904340280607e+17,
"train_loss": 0.8756545659445931,
"train_runtime": 61216.2023,
"train_samples_per_second": 5.088,
"train_steps_per_second": 0.04
}
],
"logging_steps": 10,
"max_steps": 2430,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.175904340280607e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}