|
{
|
|
"best_metric": 0.9276220745449292,
|
|
"best_model_checkpoint": "mobilenet_v2_1.0_224-finetuned-ISIC-dec2024test\\checkpoint-2430",
|
|
"epoch": 4.998459167950694,
|
|
"eval_steps": 500,
|
|
"global_step": 2430,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.02054442732408834,
|
|
"grad_norm": 23.202468872070312,
|
|
"learning_rate": 2.05761316872428e-06,
|
|
"loss": 2.2149,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.04108885464817668,
|
|
"grad_norm": 19.670244216918945,
|
|
"learning_rate": 4.11522633744856e-06,
|
|
"loss": 2.2202,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.061633281972265024,
|
|
"grad_norm": 18.64865493774414,
|
|
"learning_rate": 6.172839506172839e-06,
|
|
"loss": 2.1212,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08217770929635336,
|
|
"grad_norm": 20.16490936279297,
|
|
"learning_rate": 8.23045267489712e-06,
|
|
"loss": 2.0225,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.1027221366204417,
|
|
"grad_norm": 17.26641845703125,
|
|
"learning_rate": 1.02880658436214e-05,
|
|
"loss": 1.9166,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.12326656394453005,
|
|
"grad_norm": 13.308320045471191,
|
|
"learning_rate": 1.2345679012345678e-05,
|
|
"loss": 1.7424,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14381099126861838,
|
|
"grad_norm": 14.259194374084473,
|
|
"learning_rate": 1.440329218106996e-05,
|
|
"loss": 1.5742,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.16435541859270672,
|
|
"grad_norm": 9.573338508605957,
|
|
"learning_rate": 1.646090534979424e-05,
|
|
"loss": 1.4589,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18489984591679506,
|
|
"grad_norm": 7.806981086730957,
|
|
"learning_rate": 1.8518518518518518e-05,
|
|
"loss": 1.2895,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2054442732408834,
|
|
"grad_norm": 8.471112251281738,
|
|
"learning_rate": 2.05761316872428e-05,
|
|
"loss": 1.1936,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.22598870056497175,
|
|
"grad_norm": 7.020042419433594,
|
|
"learning_rate": 2.2633744855967078e-05,
|
|
"loss": 0.9931,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.2465331278890601,
|
|
"grad_norm": 8.448640823364258,
|
|
"learning_rate": 2.4691358024691357e-05,
|
|
"loss": 1.0736,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.2670775552131484,
|
|
"grad_norm": 6.455481052398682,
|
|
"learning_rate": 2.6748971193415638e-05,
|
|
"loss": 1.0588,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.28762198253723675,
|
|
"grad_norm": 7.331775665283203,
|
|
"learning_rate": 2.880658436213992e-05,
|
|
"loss": 1.0402,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.3081664098613251,
|
|
"grad_norm": 6.955947399139404,
|
|
"learning_rate": 3.08641975308642e-05,
|
|
"loss": 0.9195,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.32871083718541344,
|
|
"grad_norm": 7.389803886413574,
|
|
"learning_rate": 3.292181069958848e-05,
|
|
"loss": 1.0719,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.3492552645095018,
|
|
"grad_norm": 7.89853572845459,
|
|
"learning_rate": 3.497942386831276e-05,
|
|
"loss": 1.0268,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3697996918335901,
|
|
"grad_norm": 6.920297622680664,
|
|
"learning_rate": 3.7037037037037037e-05,
|
|
"loss": 0.8495,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.39034411915767847,
|
|
"grad_norm": 6.326653480529785,
|
|
"learning_rate": 3.909465020576132e-05,
|
|
"loss": 0.8852,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4108885464817668,
|
|
"grad_norm": 6.971718788146973,
|
|
"learning_rate": 4.11522633744856e-05,
|
|
"loss": 0.8968,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.43143297380585516,
|
|
"grad_norm": 5.848041534423828,
|
|
"learning_rate": 4.3209876543209875e-05,
|
|
"loss": 0.9205,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4519774011299435,
|
|
"grad_norm": 8.545123100280762,
|
|
"learning_rate": 4.5267489711934157e-05,
|
|
"loss": 1.0261,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.47252182845403184,
|
|
"grad_norm": 6.959014892578125,
|
|
"learning_rate": 4.732510288065844e-05,
|
|
"loss": 0.8751,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4930662557781202,
|
|
"grad_norm": 8.537935256958008,
|
|
"learning_rate": 4.938271604938271e-05,
|
|
"loss": 0.9604,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5136106831022085,
|
|
"grad_norm": 7.854872226715088,
|
|
"learning_rate": 4.983996342021033e-05,
|
|
"loss": 0.9493,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5341551104262968,
|
|
"grad_norm": 8.308143615722656,
|
|
"learning_rate": 4.9611339734796525e-05,
|
|
"loss": 0.9021,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5546995377503852,
|
|
"grad_norm": 8.60777759552002,
|
|
"learning_rate": 4.938271604938271e-05,
|
|
"loss": 0.9398,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5752439650744735,
|
|
"grad_norm": 6.065791606903076,
|
|
"learning_rate": 4.9154092363968915e-05,
|
|
"loss": 0.8961,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5957883923985619,
|
|
"grad_norm": 7.975262641906738,
|
|
"learning_rate": 4.89254686785551e-05,
|
|
"loss": 0.9541,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6163328197226502,
|
|
"grad_norm": 8.56069278717041,
|
|
"learning_rate": 4.86968449931413e-05,
|
|
"loss": 1.0714,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6368772470467385,
|
|
"grad_norm": 7.618618488311768,
|
|
"learning_rate": 4.8468221307727485e-05,
|
|
"loss": 0.9913,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6574216743708269,
|
|
"grad_norm": 5.433694839477539,
|
|
"learning_rate": 4.823959762231367e-05,
|
|
"loss": 0.8266,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6779661016949152,
|
|
"grad_norm": 6.71955680847168,
|
|
"learning_rate": 4.801097393689987e-05,
|
|
"loss": 0.9065,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.6985105290190036,
|
|
"grad_norm": 7.317810535430908,
|
|
"learning_rate": 4.7782350251486056e-05,
|
|
"loss": 0.8529,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7190549563430919,
|
|
"grad_norm": 7.955277919769287,
|
|
"learning_rate": 4.755372656607225e-05,
|
|
"loss": 0.9454,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7395993836671803,
|
|
"grad_norm": 8.274344444274902,
|
|
"learning_rate": 4.732510288065844e-05,
|
|
"loss": 0.9462,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7601438109912686,
|
|
"grad_norm": 6.541558265686035,
|
|
"learning_rate": 4.709647919524463e-05,
|
|
"loss": 0.8823,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7806882383153569,
|
|
"grad_norm": 7.624207019805908,
|
|
"learning_rate": 4.686785550983082e-05,
|
|
"loss": 0.9984,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.8012326656394453,
|
|
"grad_norm": 7.345012187957764,
|
|
"learning_rate": 4.6639231824417016e-05,
|
|
"loss": 0.9381,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.8217770929635336,
|
|
"grad_norm": 7.9643096923828125,
|
|
"learning_rate": 4.6410608139003203e-05,
|
|
"loss": 0.9472,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.842321520287622,
|
|
"grad_norm": 6.939286231994629,
|
|
"learning_rate": 4.618198445358939e-05,
|
|
"loss": 0.9275,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8628659476117103,
|
|
"grad_norm": 8.748644828796387,
|
|
"learning_rate": 4.5953360768175586e-05,
|
|
"loss": 1.0875,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.8834103749357987,
|
|
"grad_norm": 6.044397354125977,
|
|
"learning_rate": 4.5724737082761774e-05,
|
|
"loss": 1.0232,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.903954802259887,
|
|
"grad_norm": 10.523336410522461,
|
|
"learning_rate": 4.549611339734797e-05,
|
|
"loss": 0.9283,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9244992295839753,
|
|
"grad_norm": 5.524984836578369,
|
|
"learning_rate": 4.5267489711934157e-05,
|
|
"loss": 0.9606,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9450436569080637,
|
|
"grad_norm": 7.313499450683594,
|
|
"learning_rate": 4.503886602652035e-05,
|
|
"loss": 0.8652,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.965588084232152,
|
|
"grad_norm": 6.494114398956299,
|
|
"learning_rate": 4.481024234110654e-05,
|
|
"loss": 0.9293,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9861325115562404,
|
|
"grad_norm": 6.250232219696045,
|
|
"learning_rate": 4.4581618655692734e-05,
|
|
"loss": 0.9055,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.9984591679506933,
|
|
"eval_accuracy": 0.9195319271886738,
|
|
"eval_loss": 0.19548115134239197,
|
|
"eval_runtime": 1093.5686,
|
|
"eval_samples_per_second": 6.33,
|
|
"eval_steps_per_second": 0.198,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 1.0082177709296354,
|
|
"grad_norm": 5.486711025238037,
|
|
"learning_rate": 4.435299497027892e-05,
|
|
"loss": 0.8596,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 1.0287621982537236,
|
|
"grad_norm": 7.477694988250732,
|
|
"learning_rate": 4.412437128486511e-05,
|
|
"loss": 0.8668,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.049306625577812,
|
|
"grad_norm": 5.820909023284912,
|
|
"learning_rate": 4.3895747599451304e-05,
|
|
"loss": 0.9094,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.0698510529019003,
|
|
"grad_norm": 8.668384552001953,
|
|
"learning_rate": 4.366712391403749e-05,
|
|
"loss": 0.8712,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.0903954802259888,
|
|
"grad_norm": 6.3633575439453125,
|
|
"learning_rate": 4.343850022862369e-05,
|
|
"loss": 0.8525,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.110939907550077,
|
|
"grad_norm": 9.032384872436523,
|
|
"learning_rate": 4.3209876543209875e-05,
|
|
"loss": 0.7651,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.1314843348741654,
|
|
"grad_norm": 8.188101768493652,
|
|
"learning_rate": 4.298125285779607e-05,
|
|
"loss": 0.9195,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.1520287621982537,
|
|
"grad_norm": 6.771944046020508,
|
|
"learning_rate": 4.2752629172382264e-05,
|
|
"loss": 0.9012,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.1725731895223421,
|
|
"grad_norm": 6.4685187339782715,
|
|
"learning_rate": 4.252400548696845e-05,
|
|
"loss": 0.9641,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.1931176168464304,
|
|
"grad_norm": 6.535536289215088,
|
|
"learning_rate": 4.229538180155465e-05,
|
|
"loss": 0.8175,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.2136620441705188,
|
|
"grad_norm": 7.542140960693359,
|
|
"learning_rate": 4.2066758116140835e-05,
|
|
"loss": 0.9377,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.234206471494607,
|
|
"grad_norm": 5.927305221557617,
|
|
"learning_rate": 4.183813443072703e-05,
|
|
"loss": 0.9509,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.2547508988186955,
|
|
"grad_norm": 7.140214920043945,
|
|
"learning_rate": 4.160951074531322e-05,
|
|
"loss": 0.8999,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.2752953261427837,
|
|
"grad_norm": 7.2755231857299805,
|
|
"learning_rate": 4.138088705989941e-05,
|
|
"loss": 0.9626,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.2958397534668722,
|
|
"grad_norm": 5.402399063110352,
|
|
"learning_rate": 4.11522633744856e-05,
|
|
"loss": 0.7817,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.3163841807909604,
|
|
"grad_norm": 5.163234710693359,
|
|
"learning_rate": 4.092363968907179e-05,
|
|
"loss": 0.8986,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.3369286081150489,
|
|
"grad_norm": 7.63501501083374,
|
|
"learning_rate": 4.069501600365798e-05,
|
|
"loss": 0.7566,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.357473035439137,
|
|
"grad_norm": 7.454900741577148,
|
|
"learning_rate": 4.046639231824417e-05,
|
|
"loss": 0.8702,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.3780174627632253,
|
|
"grad_norm": 6.798664569854736,
|
|
"learning_rate": 4.0237768632830365e-05,
|
|
"loss": 0.9139,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.3985618900873138,
|
|
"grad_norm": 6.475697040557861,
|
|
"learning_rate": 4.000914494741655e-05,
|
|
"loss": 0.8935,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.4191063174114023,
|
|
"grad_norm": 7.091508865356445,
|
|
"learning_rate": 3.978052126200275e-05,
|
|
"loss": 0.8321,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.4396507447354905,
|
|
"grad_norm": 6.339083671569824,
|
|
"learning_rate": 3.9551897576588936e-05,
|
|
"loss": 0.803,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.4601951720595787,
|
|
"grad_norm": 7.827945709228516,
|
|
"learning_rate": 3.932327389117513e-05,
|
|
"loss": 0.9727,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.4807395993836672,
|
|
"grad_norm": 7.140174865722656,
|
|
"learning_rate": 3.909465020576132e-05,
|
|
"loss": 0.7649,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.5012840267077556,
|
|
"grad_norm": 6.504294395446777,
|
|
"learning_rate": 3.8866026520347506e-05,
|
|
"loss": 0.8051,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.5218284540318439,
|
|
"grad_norm": 7.512494087219238,
|
|
"learning_rate": 3.86374028349337e-05,
|
|
"loss": 0.9029,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.542372881355932,
|
|
"grad_norm": 6.313861846923828,
|
|
"learning_rate": 3.840877914951989e-05,
|
|
"loss": 0.8271,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.5629173086800205,
|
|
"grad_norm": 7.215080738067627,
|
|
"learning_rate": 3.8180155464106083e-05,
|
|
"loss": 0.9254,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.583461736004109,
|
|
"grad_norm": 6.017473220825195,
|
|
"learning_rate": 3.795153177869227e-05,
|
|
"loss": 0.7945,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.6040061633281972,
|
|
"grad_norm": 6.04453706741333,
|
|
"learning_rate": 3.7722908093278466e-05,
|
|
"loss": 0.8443,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.6245505906522855,
|
|
"grad_norm": 7.159928798675537,
|
|
"learning_rate": 3.7494284407864654e-05,
|
|
"loss": 0.9719,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.645095017976374,
|
|
"grad_norm": 6.73528528213501,
|
|
"learning_rate": 3.726566072245085e-05,
|
|
"loss": 0.8892,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.6656394453004624,
|
|
"grad_norm": 7.667166709899902,
|
|
"learning_rate": 3.7037037037037037e-05,
|
|
"loss": 0.8208,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.6861838726245506,
|
|
"grad_norm": 9.423199653625488,
|
|
"learning_rate": 3.6808413351623224e-05,
|
|
"loss": 0.8314,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.7067282999486388,
|
|
"grad_norm": 8.343061447143555,
|
|
"learning_rate": 3.657978966620942e-05,
|
|
"loss": 0.8463,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.7272727272727273,
|
|
"grad_norm": 5.084173679351807,
|
|
"learning_rate": 3.635116598079561e-05,
|
|
"loss": 0.8542,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.7478171545968157,
|
|
"grad_norm": 6.383463382720947,
|
|
"learning_rate": 3.612254229538181e-05,
|
|
"loss": 0.8588,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.768361581920904,
|
|
"grad_norm": 5.376101016998291,
|
|
"learning_rate": 3.5893918609967996e-05,
|
|
"loss": 0.752,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.7889060092449922,
|
|
"grad_norm": 7.792232036590576,
|
|
"learning_rate": 3.566529492455419e-05,
|
|
"loss": 0.7482,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.8094504365690807,
|
|
"grad_norm": 7.509520053863525,
|
|
"learning_rate": 3.543667123914038e-05,
|
|
"loss": 0.8904,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.8299948638931691,
|
|
"grad_norm": 5.035109996795654,
|
|
"learning_rate": 3.520804755372657e-05,
|
|
"loss": 0.7929,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.8505392912172574,
|
|
"grad_norm": 6.189474105834961,
|
|
"learning_rate": 3.497942386831276e-05,
|
|
"loss": 0.824,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.8710837185413456,
|
|
"grad_norm": 5.561000347137451,
|
|
"learning_rate": 3.475080018289895e-05,
|
|
"loss": 0.8891,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.891628145865434,
|
|
"grad_norm": 5.997035026550293,
|
|
"learning_rate": 3.4522176497485144e-05,
|
|
"loss": 0.8758,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.9121725731895225,
|
|
"grad_norm": 8.139898300170898,
|
|
"learning_rate": 3.429355281207133e-05,
|
|
"loss": 0.8804,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.9327170005136107,
|
|
"grad_norm": 5.594916820526123,
|
|
"learning_rate": 3.406492912665753e-05,
|
|
"loss": 0.8034,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.953261427837699,
|
|
"grad_norm": 6.618235111236572,
|
|
"learning_rate": 3.3836305441243715e-05,
|
|
"loss": 0.8684,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.9738058551617874,
|
|
"grad_norm": 5.163496017456055,
|
|
"learning_rate": 3.360768175582991e-05,
|
|
"loss": 0.8248,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.9943502824858759,
|
|
"grad_norm": 6.3719916343688965,
|
|
"learning_rate": 3.33790580704161e-05,
|
|
"loss": 0.8797,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.9984591679506933,
|
|
"eval_accuracy": 0.9137532505056342,
|
|
"eval_loss": 0.20739668607711792,
|
|
"eval_runtime": 1009.1763,
|
|
"eval_samples_per_second": 6.859,
|
|
"eval_steps_per_second": 0.215,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 2.0164355418592708,
|
|
"grad_norm": 4.454899787902832,
|
|
"learning_rate": 3.3150434385002285e-05,
|
|
"loss": 0.7782,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 2.036979969183359,
|
|
"grad_norm": 6.4835686683654785,
|
|
"learning_rate": 3.292181069958848e-05,
|
|
"loss": 0.9306,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 2.0575243965074472,
|
|
"grad_norm": 7.925194263458252,
|
|
"learning_rate": 3.269318701417467e-05,
|
|
"loss": 0.8513,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 2.078068823831536,
|
|
"grad_norm": 6.563453197479248,
|
|
"learning_rate": 3.246456332876086e-05,
|
|
"loss": 0.7011,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.098613251155624,
|
|
"grad_norm": 7.153360366821289,
|
|
"learning_rate": 3.223593964334705e-05,
|
|
"loss": 0.8101,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.1191576784797124,
|
|
"grad_norm": 6.468135356903076,
|
|
"learning_rate": 3.2007315957933245e-05,
|
|
"loss": 0.9004,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.1397021058038006,
|
|
"grad_norm": 7.5966796875,
|
|
"learning_rate": 3.177869227251943e-05,
|
|
"loss": 0.7842,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.1602465331278893,
|
|
"grad_norm": 6.29899787902832,
|
|
"learning_rate": 3.155006858710563e-05,
|
|
"loss": 0.7849,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.1807909604519775,
|
|
"grad_norm": 7.601044654846191,
|
|
"learning_rate": 3.1321444901691816e-05,
|
|
"loss": 0.8189,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.2013353877760657,
|
|
"grad_norm": 4.997586727142334,
|
|
"learning_rate": 3.1092821216278004e-05,
|
|
"loss": 0.9379,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.221879815100154,
|
|
"grad_norm": 6.758198261260986,
|
|
"learning_rate": 3.08641975308642e-05,
|
|
"loss": 0.8441,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.242424242424242,
|
|
"grad_norm": 8.121203422546387,
|
|
"learning_rate": 3.0635573845450386e-05,
|
|
"loss": 0.8064,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.262968669748331,
|
|
"grad_norm": 6.168539047241211,
|
|
"learning_rate": 3.0406950160036577e-05,
|
|
"loss": 0.8648,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.283513097072419,
|
|
"grad_norm": 8.698349952697754,
|
|
"learning_rate": 3.017832647462277e-05,
|
|
"loss": 0.9455,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.3040575243965074,
|
|
"grad_norm": 6.176644802093506,
|
|
"learning_rate": 2.994970278920896e-05,
|
|
"loss": 0.7805,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.324601951720596,
|
|
"grad_norm": 6.344507694244385,
|
|
"learning_rate": 2.972107910379515e-05,
|
|
"loss": 0.6886,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.3451463790446843,
|
|
"grad_norm": 9.571560859680176,
|
|
"learning_rate": 2.949245541838135e-05,
|
|
"loss": 0.8937,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.3656908063687725,
|
|
"grad_norm": 7.252879619598389,
|
|
"learning_rate": 2.926383173296754e-05,
|
|
"loss": 0.831,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.3862352336928607,
|
|
"grad_norm": 8.151792526245117,
|
|
"learning_rate": 2.903520804755373e-05,
|
|
"loss": 0.8442,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.406779661016949,
|
|
"grad_norm": 7.087300777435303,
|
|
"learning_rate": 2.880658436213992e-05,
|
|
"loss": 0.7779,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.4273240883410376,
|
|
"grad_norm": 6.533515930175781,
|
|
"learning_rate": 2.857796067672611e-05,
|
|
"loss": 0.8222,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 2.447868515665126,
|
|
"grad_norm": 7.180200576782227,
|
|
"learning_rate": 2.8349336991312303e-05,
|
|
"loss": 0.8127,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 2.468412942989214,
|
|
"grad_norm": 6.345178604125977,
|
|
"learning_rate": 2.8120713305898494e-05,
|
|
"loss": 0.8132,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 2.4889573703133023,
|
|
"grad_norm": 7.64429235458374,
|
|
"learning_rate": 2.7892089620484685e-05,
|
|
"loss": 0.9019,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 2.509501797637391,
|
|
"grad_norm": 8.274768829345703,
|
|
"learning_rate": 2.7663465935070876e-05,
|
|
"loss": 0.8992,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 2.5300462249614792,
|
|
"grad_norm": 6.977597713470459,
|
|
"learning_rate": 2.7434842249657068e-05,
|
|
"loss": 0.8527,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 2.5505906522855675,
|
|
"grad_norm": 6.692368030548096,
|
|
"learning_rate": 2.720621856424326e-05,
|
|
"loss": 0.8021,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 2.5711350796096557,
|
|
"grad_norm": 6.690873622894287,
|
|
"learning_rate": 2.6977594878829447e-05,
|
|
"loss": 0.8583,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 2.5916795069337444,
|
|
"grad_norm": 6.140628337860107,
|
|
"learning_rate": 2.6748971193415638e-05,
|
|
"loss": 0.859,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 2.6122239342578326,
|
|
"grad_norm": 6.804861545562744,
|
|
"learning_rate": 2.652034750800183e-05,
|
|
"loss": 0.7306,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 2.632768361581921,
|
|
"grad_norm": 7.271435260772705,
|
|
"learning_rate": 2.629172382258802e-05,
|
|
"loss": 0.8166,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 2.653312788906009,
|
|
"grad_norm": 6.456128120422363,
|
|
"learning_rate": 2.6063100137174212e-05,
|
|
"loss": 0.9386,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 2.6738572162300978,
|
|
"grad_norm": 7.159631252288818,
|
|
"learning_rate": 2.5834476451760403e-05,
|
|
"loss": 0.8064,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 2.694401643554186,
|
|
"grad_norm": 6.154369831085205,
|
|
"learning_rate": 2.5605852766346595e-05,
|
|
"loss": 0.8017,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 2.714946070878274,
|
|
"grad_norm": 6.905427932739258,
|
|
"learning_rate": 2.5377229080932786e-05,
|
|
"loss": 0.8414,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 2.7354904982023625,
|
|
"grad_norm": 7.776165962219238,
|
|
"learning_rate": 2.5148605395518977e-05,
|
|
"loss": 0.8297,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 2.7560349255264507,
|
|
"grad_norm": 6.178536415100098,
|
|
"learning_rate": 2.4919981710105165e-05,
|
|
"loss": 0.7867,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 2.7765793528505394,
|
|
"grad_norm": 5.276233196258545,
|
|
"learning_rate": 2.4691358024691357e-05,
|
|
"loss": 0.8698,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 2.7971237801746276,
|
|
"grad_norm": 5.217291355133057,
|
|
"learning_rate": 2.446273433927755e-05,
|
|
"loss": 0.8277,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 2.817668207498716,
|
|
"grad_norm": 5.9258856773376465,
|
|
"learning_rate": 2.4234110653863743e-05,
|
|
"loss": 0.8079,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 2.8382126348228045,
|
|
"grad_norm": 5.345384120941162,
|
|
"learning_rate": 2.4005486968449934e-05,
|
|
"loss": 0.7356,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 2.8587570621468927,
|
|
"grad_norm": 6.879024505615234,
|
|
"learning_rate": 2.3776863283036125e-05,
|
|
"loss": 0.8116,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 2.879301489470981,
|
|
"grad_norm": 5.867737770080566,
|
|
"learning_rate": 2.3548239597622316e-05,
|
|
"loss": 0.7428,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 2.899845916795069,
|
|
"grad_norm": 6.256878852844238,
|
|
"learning_rate": 2.3319615912208508e-05,
|
|
"loss": 0.8134,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 2.9203903441191574,
|
|
"grad_norm": 6.346487045288086,
|
|
"learning_rate": 2.3090992226794696e-05,
|
|
"loss": 0.6877,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 2.940934771443246,
|
|
"grad_norm": 5.782416820526123,
|
|
"learning_rate": 2.2862368541380887e-05,
|
|
"loss": 0.8478,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 2.9614791987673343,
|
|
"grad_norm": 6.417751312255859,
|
|
"learning_rate": 2.2633744855967078e-05,
|
|
"loss": 0.7668,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 2.9820236260914226,
|
|
"grad_norm": 6.743643760681152,
|
|
"learning_rate": 2.240512117055327e-05,
|
|
"loss": 0.8144,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 2.9984591679506933,
|
|
"eval_accuracy": 0.9263218722912453,
|
|
"eval_loss": 0.17973794043064117,
|
|
"eval_runtime": 1009.1137,
|
|
"eval_samples_per_second": 6.859,
|
|
"eval_steps_per_second": 0.215,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 3.0041088854648175,
|
|
"grad_norm": 6.051700592041016,
|
|
"learning_rate": 2.217649748513946e-05,
|
|
"loss": 0.9408,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 3.024653312788906,
|
|
"grad_norm": 8.463972091674805,
|
|
"learning_rate": 2.1947873799725652e-05,
|
|
"loss": 0.7778,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 3.0451977401129944,
|
|
"grad_norm": 6.033344268798828,
|
|
"learning_rate": 2.1719250114311843e-05,
|
|
"loss": 0.7648,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 3.0657421674370826,
|
|
"grad_norm": 6.287738800048828,
|
|
"learning_rate": 2.1490626428898035e-05,
|
|
"loss": 0.7507,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 3.086286594761171,
|
|
"grad_norm": 6.505873680114746,
|
|
"learning_rate": 2.1262002743484226e-05,
|
|
"loss": 0.7668,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 3.1068310220852595,
|
|
"grad_norm": 5.928491115570068,
|
|
"learning_rate": 2.1033379058070417e-05,
|
|
"loss": 0.8387,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 3.1273754494093478,
|
|
"grad_norm": 8.137348175048828,
|
|
"learning_rate": 2.080475537265661e-05,
|
|
"loss": 0.7546,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 3.147919876733436,
|
|
"grad_norm": 6.729381561279297,
|
|
"learning_rate": 2.05761316872428e-05,
|
|
"loss": 0.7086,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 3.168464304057524,
|
|
"grad_norm": 6.623689651489258,
|
|
"learning_rate": 2.034750800182899e-05,
|
|
"loss": 0.9436,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 3.189008731381613,
|
|
"grad_norm": 7.943443298339844,
|
|
"learning_rate": 2.0118884316415183e-05,
|
|
"loss": 0.8645,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 3.209553158705701,
|
|
"grad_norm": 5.353769302368164,
|
|
"learning_rate": 1.9890260631001374e-05,
|
|
"loss": 0.7909,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 3.2300975860297894,
|
|
"grad_norm": 6.193889141082764,
|
|
"learning_rate": 1.9661636945587565e-05,
|
|
"loss": 0.7595,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 3.2506420133538776,
|
|
"grad_norm": 8.73640251159668,
|
|
"learning_rate": 1.9433013260173753e-05,
|
|
"loss": 0.729,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 3.2711864406779663,
|
|
"grad_norm": 6.345188617706299,
|
|
"learning_rate": 1.9204389574759944e-05,
|
|
"loss": 0.6933,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 3.2917308680020545,
|
|
"grad_norm": 9.154464721679688,
|
|
"learning_rate": 1.8975765889346136e-05,
|
|
"loss": 0.8427,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 3.3122752953261427,
|
|
"grad_norm": 7.497635841369629,
|
|
"learning_rate": 1.8747142203932327e-05,
|
|
"loss": 0.7922,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 3.332819722650231,
|
|
"grad_norm": 6.4137468338012695,
|
|
"learning_rate": 1.8518518518518518e-05,
|
|
"loss": 0.7468,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 3.3533641499743196,
|
|
"grad_norm": 8.157144546508789,
|
|
"learning_rate": 1.828989483310471e-05,
|
|
"loss": 0.7347,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 3.373908577298408,
|
|
"grad_norm": 5.25002908706665,
|
|
"learning_rate": 1.8061271147690904e-05,
|
|
"loss": 0.7833,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 3.394453004622496,
|
|
"grad_norm": 6.77322244644165,
|
|
"learning_rate": 1.7832647462277096e-05,
|
|
"loss": 0.7639,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 3.4149974319465843,
|
|
"grad_norm": 6.466352462768555,
|
|
"learning_rate": 1.7604023776863283e-05,
|
|
"loss": 0.6913,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 3.435541859270673,
|
|
"grad_norm": 6.149074077606201,
|
|
"learning_rate": 1.7375400091449475e-05,
|
|
"loss": 0.85,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 3.4560862865947612,
|
|
"grad_norm": 8.840483665466309,
|
|
"learning_rate": 1.7146776406035666e-05,
|
|
"loss": 0.8483,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 3.4766307139188495,
|
|
"grad_norm": 7.109951019287109,
|
|
"learning_rate": 1.6918152720621857e-05,
|
|
"loss": 0.8669,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 3.4971751412429377,
|
|
"grad_norm": 7.431482315063477,
|
|
"learning_rate": 1.668952903520805e-05,
|
|
"loss": 0.7942,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 3.517719568567026,
|
|
"grad_norm": 8.127689361572266,
|
|
"learning_rate": 1.646090534979424e-05,
|
|
"loss": 0.8801,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 3.5382639958911146,
|
|
"grad_norm": 5.593295574188232,
|
|
"learning_rate": 1.623228166438043e-05,
|
|
"loss": 0.797,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 3.558808423215203,
|
|
"grad_norm": 6.143307685852051,
|
|
"learning_rate": 1.6003657978966623e-05,
|
|
"loss": 0.8404,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 3.579352850539291,
|
|
"grad_norm": 7.268124103546143,
|
|
"learning_rate": 1.5775034293552814e-05,
|
|
"loss": 0.7545,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 3.5998972778633798,
|
|
"grad_norm": 6.966310501098633,
|
|
"learning_rate": 1.5546410608139002e-05,
|
|
"loss": 0.7656,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 3.620441705187468,
|
|
"grad_norm": 7.398248672485352,
|
|
"learning_rate": 1.5317786922725193e-05,
|
|
"loss": 0.8126,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 3.6409861325115562,
|
|
"grad_norm": 7.494919776916504,
|
|
"learning_rate": 1.5089163237311384e-05,
|
|
"loss": 0.908,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 3.6615305598356445,
|
|
"grad_norm": 5.857889175415039,
|
|
"learning_rate": 1.4860539551897576e-05,
|
|
"loss": 0.7003,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 3.6820749871597327,
|
|
"grad_norm": 8.693001747131348,
|
|
"learning_rate": 1.463191586648377e-05,
|
|
"loss": 0.8321,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 3.7026194144838214,
|
|
"grad_norm": 8.051487922668457,
|
|
"learning_rate": 1.440329218106996e-05,
|
|
"loss": 0.8759,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 3.7231638418079096,
|
|
"grad_norm": 5.1894612312316895,
|
|
"learning_rate": 1.4174668495656151e-05,
|
|
"loss": 0.7325,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 3.743708269131998,
|
|
"grad_norm": 5.104062557220459,
|
|
"learning_rate": 1.3946044810242343e-05,
|
|
"loss": 0.7732,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 3.7642526964560865,
|
|
"grad_norm": 7.708363056182861,
|
|
"learning_rate": 1.3717421124828534e-05,
|
|
"loss": 0.8328,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 3.7847971237801747,
|
|
"grad_norm": 6.15858268737793,
|
|
"learning_rate": 1.3488797439414723e-05,
|
|
"loss": 0.738,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 3.805341551104263,
|
|
"grad_norm": 7.959890365600586,
|
|
"learning_rate": 1.3260173754000915e-05,
|
|
"loss": 0.818,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 3.825885978428351,
|
|
"grad_norm": 7.602783679962158,
|
|
"learning_rate": 1.3031550068587106e-05,
|
|
"loss": 0.9028,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 3.8464304057524394,
|
|
"grad_norm": 7.417806625366211,
|
|
"learning_rate": 1.2802926383173297e-05,
|
|
"loss": 0.8158,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 3.866974833076528,
|
|
"grad_norm": 6.925180435180664,
|
|
"learning_rate": 1.2574302697759489e-05,
|
|
"loss": 0.7867,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 3.8875192604006163,
|
|
"grad_norm": 4.716423034667969,
|
|
"learning_rate": 1.2345679012345678e-05,
|
|
"loss": 0.6969,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 3.9080636877247046,
|
|
"grad_norm": 5.976194381713867,
|
|
"learning_rate": 1.2117055326931871e-05,
|
|
"loss": 0.7292,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 3.9286081150487933,
|
|
"grad_norm": 6.812644958496094,
|
|
"learning_rate": 1.1888431641518063e-05,
|
|
"loss": 0.7845,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 3.9491525423728815,
|
|
"grad_norm": 7.749550819396973,
|
|
"learning_rate": 1.1659807956104254e-05,
|
|
"loss": 0.8008,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 3.9696969696969697,
|
|
"grad_norm": 7.299574375152588,
|
|
"learning_rate": 1.1431184270690443e-05,
|
|
"loss": 0.7896,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 3.990241397021058,
|
|
"grad_norm": 7.994142055511475,
|
|
"learning_rate": 1.1202560585276635e-05,
|
|
"loss": 0.9243,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 3.9984591679506933,
|
|
"eval_accuracy": 0.9232880670326495,
|
|
"eval_loss": 0.18616917729377747,
|
|
"eval_runtime": 1074.4755,
|
|
"eval_samples_per_second": 6.442,
|
|
"eval_steps_per_second": 0.202,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 4.012326656394453,
|
|
"grad_norm": 6.763792991638184,
|
|
"learning_rate": 1.0973936899862826e-05,
|
|
"loss": 0.7882,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 4.0328710837185415,
|
|
"grad_norm": 7.702907085418701,
|
|
"learning_rate": 1.0745313214449017e-05,
|
|
"loss": 0.8705,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 4.05341551104263,
|
|
"grad_norm": 6.545944690704346,
|
|
"learning_rate": 1.0516689529035209e-05,
|
|
"loss": 0.8295,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 4.073959938366718,
|
|
"grad_norm": 7.43347692489624,
|
|
"learning_rate": 1.02880658436214e-05,
|
|
"loss": 0.7686,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 4.094504365690806,
|
|
"grad_norm": 6.285999298095703,
|
|
"learning_rate": 1.0059442158207591e-05,
|
|
"loss": 0.7991,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 4.1150487930148945,
|
|
"grad_norm": 8.586403846740723,
|
|
"learning_rate": 9.830818472793783e-06,
|
|
"loss": 0.7933,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 4.135593220338983,
|
|
"grad_norm": 7.343191623687744,
|
|
"learning_rate": 9.602194787379972e-06,
|
|
"loss": 0.7762,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 4.156137647663072,
|
|
"grad_norm": 5.3163933753967285,
|
|
"learning_rate": 9.373571101966163e-06,
|
|
"loss": 0.7508,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 4.17668207498716,
|
|
"grad_norm": 6.618367671966553,
|
|
"learning_rate": 9.144947416552355e-06,
|
|
"loss": 0.671,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 4.197226502311248,
|
|
"grad_norm": 4.874975681304932,
|
|
"learning_rate": 8.916323731138548e-06,
|
|
"loss": 0.713,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 4.2177709296353365,
|
|
"grad_norm": 6.649152755737305,
|
|
"learning_rate": 8.687700045724737e-06,
|
|
"loss": 0.8196,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 4.238315356959425,
|
|
"grad_norm": 6.106600284576416,
|
|
"learning_rate": 8.459076360310929e-06,
|
|
"loss": 0.744,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 4.258859784283513,
|
|
"grad_norm": 7.9351630210876465,
|
|
"learning_rate": 8.23045267489712e-06,
|
|
"loss": 0.7915,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 4.279404211607601,
|
|
"grad_norm": 8.719624519348145,
|
|
"learning_rate": 8.001828989483311e-06,
|
|
"loss": 0.8307,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 4.299948638931689,
|
|
"grad_norm": 8.237951278686523,
|
|
"learning_rate": 7.773205304069501e-06,
|
|
"loss": 0.7875,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 4.320493066255779,
|
|
"grad_norm": 7.945969581604004,
|
|
"learning_rate": 7.544581618655692e-06,
|
|
"loss": 0.7283,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 4.341037493579867,
|
|
"grad_norm": 9.574934959411621,
|
|
"learning_rate": 7.315957933241885e-06,
|
|
"loss": 0.8088,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 4.361581920903955,
|
|
"grad_norm": 7.645279884338379,
|
|
"learning_rate": 7.087334247828076e-06,
|
|
"loss": 0.77,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 4.382126348228043,
|
|
"grad_norm": 7.663369178771973,
|
|
"learning_rate": 6.858710562414267e-06,
|
|
"loss": 0.7608,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 4.4026707755521315,
|
|
"grad_norm": 7.651998996734619,
|
|
"learning_rate": 6.630086877000457e-06,
|
|
"loss": 0.7465,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 4.42321520287622,
|
|
"grad_norm": 6.678437232971191,
|
|
"learning_rate": 6.401463191586649e-06,
|
|
"loss": 0.725,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 4.443759630200308,
|
|
"grad_norm": 7.033961772918701,
|
|
"learning_rate": 6.172839506172839e-06,
|
|
"loss": 0.7678,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 4.464304057524396,
|
|
"grad_norm": 7.408419609069824,
|
|
"learning_rate": 5.944215820759031e-06,
|
|
"loss": 0.7689,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 4.484848484848484,
|
|
"grad_norm": 8.51754379272461,
|
|
"learning_rate": 5.715592135345222e-06,
|
|
"loss": 0.852,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 4.5053929121725735,
|
|
"grad_norm": 5.929790019989014,
|
|
"learning_rate": 5.486968449931413e-06,
|
|
"loss": 0.7294,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 4.525937339496662,
|
|
"grad_norm": 7.980464935302734,
|
|
"learning_rate": 5.258344764517604e-06,
|
|
"loss": 0.8615,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 4.54648176682075,
|
|
"grad_norm": 9.191452026367188,
|
|
"learning_rate": 5.029721079103796e-06,
|
|
"loss": 0.9008,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 4.567026194144838,
|
|
"grad_norm": 8.33234977722168,
|
|
"learning_rate": 4.801097393689986e-06,
|
|
"loss": 0.8745,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 4.5875706214689265,
|
|
"grad_norm": 6.941461086273193,
|
|
"learning_rate": 4.572473708276177e-06,
|
|
"loss": 0.7385,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 4.608115048793015,
|
|
"grad_norm": 4.740252494812012,
|
|
"learning_rate": 4.343850022862369e-06,
|
|
"loss": 0.7224,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 4.628659476117103,
|
|
"grad_norm": 7.316972255706787,
|
|
"learning_rate": 4.11522633744856e-06,
|
|
"loss": 0.8351,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 4.649203903441192,
|
|
"grad_norm": 6.795185565948486,
|
|
"learning_rate": 3.8866026520347504e-06,
|
|
"loss": 0.7554,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 4.66974833076528,
|
|
"grad_norm": 8.930145263671875,
|
|
"learning_rate": 3.6579789666209426e-06,
|
|
"loss": 0.8177,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 4.6902927580893685,
|
|
"grad_norm": 7.089832782745361,
|
|
"learning_rate": 3.4293552812071335e-06,
|
|
"loss": 0.7476,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 4.710837185413457,
|
|
"grad_norm": 6.567149639129639,
|
|
"learning_rate": 3.2007315957933243e-06,
|
|
"loss": 0.841,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 4.731381612737545,
|
|
"grad_norm": 6.558457851409912,
|
|
"learning_rate": 2.9721079103795156e-06,
|
|
"loss": 0.7575,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 4.751926040061633,
|
|
"grad_norm": 8.95292854309082,
|
|
"learning_rate": 2.7434842249657065e-06,
|
|
"loss": 0.7493,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 4.7724704673857214,
|
|
"grad_norm": 7.489271640777588,
|
|
"learning_rate": 2.514860539551898e-06,
|
|
"loss": 0.716,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 4.79301489470981,
|
|
"grad_norm": 7.381133079528809,
|
|
"learning_rate": 2.2862368541380887e-06,
|
|
"loss": 0.8227,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 4.813559322033898,
|
|
"grad_norm": 8.651927947998047,
|
|
"learning_rate": 2.05761316872428e-06,
|
|
"loss": 0.7768,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 4.834103749357987,
|
|
"grad_norm": 7.568137168884277,
|
|
"learning_rate": 1.8289894833104713e-06,
|
|
"loss": 0.7768,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 4.854648176682075,
|
|
"grad_norm": 8.011246681213379,
|
|
"learning_rate": 1.6003657978966622e-06,
|
|
"loss": 0.7113,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 4.8751926040061635,
|
|
"grad_norm": 8.442760467529297,
|
|
"learning_rate": 1.3717421124828533e-06,
|
|
"loss": 0.7428,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 4.895737031330252,
|
|
"grad_norm": 5.148819446563721,
|
|
"learning_rate": 1.1431184270690443e-06,
|
|
"loss": 0.7326,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 4.91628145865434,
|
|
"grad_norm": 6.452014923095703,
|
|
"learning_rate": 9.144947416552356e-07,
|
|
"loss": 0.8117,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 4.936825885978428,
|
|
"grad_norm": 5.830743789672852,
|
|
"learning_rate": 6.858710562414266e-07,
|
|
"loss": 0.7118,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 4.957370313302516,
|
|
"grad_norm": 6.933480739593506,
|
|
"learning_rate": 4.572473708276178e-07,
|
|
"loss": 0.7462,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 4.977914740626605,
|
|
"grad_norm": 6.512324810028076,
|
|
"learning_rate": 2.286236854138089e-07,
|
|
"loss": 0.7691,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 4.998459167950694,
|
|
"grad_norm": 6.746326446533203,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.8199,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 4.998459167950694,
|
|
"eval_accuracy": 0.9276220745449292,
|
|
"eval_loss": 0.17630332708358765,
|
|
"eval_runtime": 1096.6155,
|
|
"eval_samples_per_second": 6.312,
|
|
"eval_steps_per_second": 0.198,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 4.998459167950694,
|
|
"step": 2430,
|
|
"total_flos": 8.175904340280607e+17,
|
|
"train_loss": 0.8756545659445931,
|
|
"train_runtime": 61216.2023,
|
|
"train_samples_per_second": 5.088,
|
|
"train_steps_per_second": 0.04
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 2430,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 8.175904340280607e+17,
|
|
"train_batch_size": 32,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|