Qwen2.5-VL-3B-Instruct-Agentic / trainer_state.json
A-Mahla's picture
A-Mahla HF Staff
Model save
50bb8e0 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007816316560820713,
"grad_norm": 69.75188475573104,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.5559,
"mean_token_accuracy": 0.6205124616622925,
"num_tokens": 5924597.0,
"step": 5
},
{
"epoch": 0.015632633121641426,
"grad_norm": 37.890360906113195,
"learning_rate": 4.5e-06,
"loss": 2.096,
"mean_token_accuracy": 0.6493684396147728,
"num_tokens": 11853358.0,
"step": 10
},
{
"epoch": 0.02344894968246214,
"grad_norm": 37.89921398442958,
"learning_rate": 7e-06,
"loss": 1.2923,
"mean_token_accuracy": 0.699188905954361,
"num_tokens": 17774953.0,
"step": 15
},
{
"epoch": 0.03126526624328285,
"grad_norm": 3.9611680059042,
"learning_rate": 9.5e-06,
"loss": 1.0185,
"mean_token_accuracy": 0.736442020535469,
"num_tokens": 23743902.0,
"step": 20
},
{
"epoch": 0.039081582804103565,
"grad_norm": 6.339716255577251,
"learning_rate": 9.999075719055307e-06,
"loss": 0.8959,
"mean_token_accuracy": 0.7516884453594684,
"num_tokens": 29670699.0,
"step": 25
},
{
"epoch": 0.04689789936492428,
"grad_norm": 3.2460756840648544,
"learning_rate": 9.995321478440751e-06,
"loss": 0.8664,
"mean_token_accuracy": 0.7534175351262092,
"num_tokens": 35627815.0,
"step": 30
},
{
"epoch": 0.05471421592574499,
"grad_norm": 3.4636834905707827,
"learning_rate": 9.988681918400355e-06,
"loss": 0.85,
"mean_token_accuracy": 0.7533099494874478,
"num_tokens": 41558055.0,
"step": 35
},
{
"epoch": 0.0625305324865657,
"grad_norm": 2.995605141331059,
"learning_rate": 9.9791613005318e-06,
"loss": 0.8151,
"mean_token_accuracy": 0.7614992260932922,
"num_tokens": 47493638.0,
"step": 40
},
{
"epoch": 0.07034684904738642,
"grad_norm": 3.1347703455639584,
"learning_rate": 9.966765735638018e-06,
"loss": 0.7855,
"mean_token_accuracy": 0.7702661901712418,
"num_tokens": 53415769.0,
"step": 45
},
{
"epoch": 0.07816316560820713,
"grad_norm": 4.194109438368802,
"learning_rate": 9.951503179804989e-06,
"loss": 0.763,
"mean_token_accuracy": 0.7751710690557957,
"num_tokens": 59313984.0,
"step": 50
},
{
"epoch": 0.08597948216902784,
"grad_norm": 2.5694956316167405,
"learning_rate": 9.933383429295124e-06,
"loss": 0.7461,
"mean_token_accuracy": 0.7756987683475017,
"num_tokens": 65218898.0,
"step": 55
},
{
"epoch": 0.09379579872984856,
"grad_norm": 2.4022434258446457,
"learning_rate": 9.912418114259548e-06,
"loss": 0.7305,
"mean_token_accuracy": 0.7788987644016743,
"num_tokens": 71140560.0,
"step": 60
},
{
"epoch": 0.10161211529066927,
"grad_norm": 2.0354431025023314,
"learning_rate": 9.888620691273284e-06,
"loss": 0.7078,
"mean_token_accuracy": 0.7819231644272804,
"num_tokens": 77078478.0,
"step": 65
},
{
"epoch": 0.10942843185148998,
"grad_norm": 1.88497692389339,
"learning_rate": 9.862006434698169e-06,
"loss": 0.6963,
"mean_token_accuracy": 0.7847778849303723,
"num_tokens": 82996564.0,
"step": 70
},
{
"epoch": 0.1172447484123107,
"grad_norm": 1.9200062767141564,
"learning_rate": 9.832592426879006e-06,
"loss": 0.688,
"mean_token_accuracy": 0.7845040634274483,
"num_tokens": 88951976.0,
"step": 75
},
{
"epoch": 0.1250610649731314,
"grad_norm": 2.210483530160085,
"learning_rate": 9.800397547179276e-06,
"loss": 0.6829,
"mean_token_accuracy": 0.7839573793113231,
"num_tokens": 94895652.0,
"step": 80
},
{
"epoch": 0.13287738153395212,
"grad_norm": 2.0791394687934845,
"learning_rate": 9.765442459863428e-06,
"loss": 0.682,
"mean_token_accuracy": 0.7845308348536492,
"num_tokens": 100832079.0,
"step": 85
},
{
"epoch": 0.14069369809477283,
"grad_norm": 2.148171577637037,
"learning_rate": 9.72774960083353e-06,
"loss": 0.6906,
"mean_token_accuracy": 0.7805382929742336,
"num_tokens": 106787613.0,
"step": 90
},
{
"epoch": 0.14851001465559355,
"grad_norm": 2.0898108754881326,
"learning_rate": 9.687343163228806e-06,
"loss": 0.6684,
"mean_token_accuracy": 0.7865298599004745,
"num_tokens": 112717599.0,
"step": 95
},
{
"epoch": 0.15632633121641426,
"grad_norm": 4.0822591130362955,
"learning_rate": 9.644249081897277e-06,
"loss": 0.6648,
"mean_token_accuracy": 0.7872977338731288,
"num_tokens": 118650442.0,
"step": 100
},
{
"epoch": 0.16414264777723497,
"grad_norm": 2.441738444276975,
"learning_rate": 9.598495016749493e-06,
"loss": 0.6689,
"mean_token_accuracy": 0.7859964817762375,
"num_tokens": 124574766.0,
"step": 105
},
{
"epoch": 0.1719589643380557,
"grad_norm": 2.075995258307634,
"learning_rate": 9.55011033500505e-06,
"loss": 0.6605,
"mean_token_accuracy": 0.787811417132616,
"num_tokens": 130516200.0,
"step": 110
},
{
"epoch": 0.1797752808988764,
"grad_norm": 2.609489028108713,
"learning_rate": 9.499126092343237e-06,
"loss": 0.661,
"mean_token_accuracy": 0.7875787198543549,
"num_tokens": 136457829.0,
"step": 115
},
{
"epoch": 0.1875915974596971,
"grad_norm": 3.7440429066647734,
"learning_rate": 9.445575012969977e-06,
"loss": 0.6709,
"mean_token_accuracy": 0.7853954270482063,
"num_tokens": 142385131.0,
"step": 120
},
{
"epoch": 0.19540791402051783,
"grad_norm": 2.8751982634480044,
"learning_rate": 9.38949146861382e-06,
"loss": 0.663,
"mean_token_accuracy": 0.7872662946581841,
"num_tokens": 148289898.0,
"step": 125
},
{
"epoch": 0.20322423058133854,
"grad_norm": 2.925083855527294,
"learning_rate": 9.33091145646446e-06,
"loss": 0.6486,
"mean_token_accuracy": 0.7910164006054401,
"num_tokens": 154232386.0,
"step": 130
},
{
"epoch": 0.21104054714215925,
"grad_norm": 3.73974121507644,
"learning_rate": 9.26987257606797e-06,
"loss": 0.653,
"mean_token_accuracy": 0.7898711428046227,
"num_tokens": 160150772.0,
"step": 135
},
{
"epoch": 0.21885686370297996,
"grad_norm": 2.074910644810112,
"learning_rate": 9.206414005193539e-06,
"loss": 0.6564,
"mean_token_accuracy": 0.7886676676571369,
"num_tokens": 166088160.0,
"step": 140
},
{
"epoch": 0.22667318026380068,
"grad_norm": 3.765010702267194,
"learning_rate": 9.140576474687263e-06,
"loss": 0.665,
"mean_token_accuracy": 0.7880136586725712,
"num_tokens": 172032929.0,
"step": 145
},
{
"epoch": 0.2344894968246214,
"grad_norm": 2.3920249014704944,
"learning_rate": 9.072402242329067e-06,
"loss": 0.6503,
"mean_token_accuracy": 0.7899810753762722,
"num_tokens": 178005406.0,
"step": 150
},
{
"epoch": 0.2423058133854421,
"grad_norm": 4.535876880687944,
"learning_rate": 9.001935065709569e-06,
"loss": 0.6427,
"mean_token_accuracy": 0.7929855234920978,
"num_tokens": 183927144.0,
"step": 155
},
{
"epoch": 0.2501221299462628,
"grad_norm": 3.44122188439969,
"learning_rate": 8.929220174144304e-06,
"loss": 0.6489,
"mean_token_accuracy": 0.7914013616740704,
"num_tokens": 189848723.0,
"step": 160
},
{
"epoch": 0.25793844650708353,
"grad_norm": 2.2378659722902188,
"learning_rate": 8.85430423964332e-06,
"loss": 0.6386,
"mean_token_accuracy": 0.792429718375206,
"num_tokens": 195777877.0,
"step": 165
},
{
"epoch": 0.26575476306790424,
"grad_norm": 2.982598937419653,
"learning_rate": 8.777235346954753e-06,
"loss": 0.649,
"mean_token_accuracy": 0.790771734714508,
"num_tokens": 201713378.0,
"step": 170
},
{
"epoch": 0.27357107962872496,
"grad_norm": 1.9062511611198192,
"learning_rate": 8.698062962701691e-06,
"loss": 0.652,
"mean_token_accuracy": 0.790651909261942,
"num_tokens": 207661581.0,
"step": 175
},
{
"epoch": 0.28138739618954567,
"grad_norm": 2.7700238166088877,
"learning_rate": 8.616837903632026e-06,
"loss": 0.6438,
"mean_token_accuracy": 0.7914554052054882,
"num_tokens": 213597723.0,
"step": 180
},
{
"epoch": 0.2892037127503664,
"grad_norm": 4.298205320427554,
"learning_rate": 8.533612304001763e-06,
"loss": 0.6569,
"mean_token_accuracy": 0.7872735880315304,
"num_tokens": 219543570.0,
"step": 185
},
{
"epoch": 0.2970200293111871,
"grad_norm": 2.62809039414661,
"learning_rate": 8.44843958211269e-06,
"loss": 0.646,
"mean_token_accuracy": 0.7901849329471589,
"num_tokens": 225490076.0,
"step": 190
},
{
"epoch": 0.3048363458720078,
"grad_norm": 2.540505165257454,
"learning_rate": 8.361374406025853e-06,
"loss": 0.6452,
"mean_token_accuracy": 0.7908910043537617,
"num_tokens": 231429929.0,
"step": 195
},
{
"epoch": 0.3126526624328285,
"grad_norm": 1.763996580236139,
"learning_rate": 8.272472658472906e-06,
"loss": 0.6529,
"mean_token_accuracy": 0.7878620237112045,
"num_tokens": 237384555.0,
"step": 200
},
{
"epoch": 0.32046897899364923,
"grad_norm": 1.6233120878672673,
"learning_rate": 8.181791400987807e-06,
"loss": 0.6343,
"mean_token_accuracy": 0.7936271652579308,
"num_tokens": 243313192.0,
"step": 205
},
{
"epoch": 0.32828529555446995,
"grad_norm": 1.9405753464802942,
"learning_rate": 8.089388837281915e-06,
"loss": 0.6439,
"mean_token_accuracy": 0.7914594881236553,
"num_tokens": 249220870.0,
"step": 210
},
{
"epoch": 0.33610161211529066,
"grad_norm": 3.488125244229796,
"learning_rate": 7.995324275885961e-06,
"loss": 0.6351,
"mean_token_accuracy": 0.7946518436074257,
"num_tokens": 255166697.0,
"step": 215
},
{
"epoch": 0.3439179286761114,
"grad_norm": 8.607586639921637,
"learning_rate": 7.89965809208291e-06,
"loss": 0.6426,
"mean_token_accuracy": 0.7916349656879902,
"num_tokens": 261089161.0,
"step": 220
},
{
"epoch": 0.3517342452369321,
"grad_norm": 1.6660014877602962,
"learning_rate": 7.802451689156122e-06,
"loss": 0.6481,
"mean_token_accuracy": 0.79035182595253,
"num_tokens": 267021428.0,
"step": 225
},
{
"epoch": 0.3595505617977528,
"grad_norm": 1.6153459083711095,
"learning_rate": 7.70376745897768e-06,
"loss": 0.6414,
"mean_token_accuracy": 0.7924111239612103,
"num_tokens": 272961221.0,
"step": 230
},
{
"epoch": 0.3673668783585735,
"grad_norm": 1.7376528456449458,
"learning_rate": 7.6036687419622215e-06,
"loss": 0.6359,
"mean_token_accuracy": 0.7931422784924507,
"num_tokens": 278866458.0,
"step": 235
},
{
"epoch": 0.3751831949193942,
"grad_norm": 1.7509363427645832,
"learning_rate": 7.5022197864119175e-06,
"loss": 0.6455,
"mean_token_accuracy": 0.7915025249123573,
"num_tokens": 284785499.0,
"step": 240
},
{
"epoch": 0.38299951148021494,
"grad_norm": 2.863849300726178,
"learning_rate": 7.399485707278744e-06,
"loss": 0.6478,
"mean_token_accuracy": 0.7907331958413124,
"num_tokens": 290732280.0,
"step": 245
},
{
"epoch": 0.39081582804103565,
"grad_norm": 1.4846518170955887,
"learning_rate": 7.295532444370485e-06,
"loss": 0.6451,
"mean_token_accuracy": 0.7914663501083851,
"num_tokens": 296654941.0,
"step": 250
},
{
"epoch": 0.39863214460185636,
"grad_norm": 1.7383678766587618,
"learning_rate": 7.190426720027306e-06,
"loss": 0.644,
"mean_token_accuracy": 0.7916645854711533,
"num_tokens": 302605292.0,
"step": 255
},
{
"epoch": 0.4064484611626771,
"grad_norm": 2.2690859031654087,
"learning_rate": 7.084235996296068e-06,
"loss": 0.6409,
"mean_token_accuracy": 0.792822826653719,
"num_tokens": 308552365.0,
"step": 260
},
{
"epoch": 0.4142647777234978,
"grad_norm": 1.6536907812645472,
"learning_rate": 6.977028431629839e-06,
"loss": 0.6418,
"mean_token_accuracy": 0.7921351306140423,
"num_tokens": 314489877.0,
"step": 265
},
{
"epoch": 0.4220810942843185,
"grad_norm": 1.6417211396491032,
"learning_rate": 6.86887283714044e-06,
"loss": 0.6376,
"mean_token_accuracy": 0.7937514387071133,
"num_tokens": 320414279.0,
"step": 270
},
{
"epoch": 0.4298974108451392,
"grad_norm": 2.1351664741224963,
"learning_rate": 6.7598386324320745e-06,
"loss": 0.6298,
"mean_token_accuracy": 0.7947640925645828,
"num_tokens": 326349818.0,
"step": 275
},
{
"epoch": 0.43771372740595993,
"grad_norm": 1.4007468087271036,
"learning_rate": 6.649995801044391e-06,
"loss": 0.6414,
"mean_token_accuracy": 0.7926677256822586,
"num_tokens": 332276019.0,
"step": 280
},
{
"epoch": 0.44553004396678064,
"grad_norm": 1.6121431265331962,
"learning_rate": 6.539414845533596e-06,
"loss": 0.6393,
"mean_token_accuracy": 0.7921099595725536,
"num_tokens": 338209339.0,
"step": 285
},
{
"epoch": 0.45334636052760136,
"grad_norm": 2.031155969828718,
"learning_rate": 6.428166742220423e-06,
"loss": 0.625,
"mean_token_accuracy": 0.7955845050513745,
"num_tokens": 344137484.0,
"step": 290
},
{
"epoch": 0.46116267708842207,
"grad_norm": 1.4450498950991713,
"learning_rate": 6.316322895634029e-06,
"loss": 0.6374,
"mean_token_accuracy": 0.7931911982595921,
"num_tokens": 350078203.0,
"step": 295
},
{
"epoch": 0.4689789936492428,
"grad_norm": 1.9913252644164074,
"learning_rate": 6.20395509268104e-06,
"loss": 0.6214,
"mean_token_accuracy": 0.7970752798020839,
"num_tokens": 356025763.0,
"step": 300
},
{
"epoch": 0.4767953102100635,
"grad_norm": 2.8315822034325837,
"learning_rate": 6.0911354565691594e-06,
"loss": 0.6304,
"mean_token_accuracy": 0.796255373954773,
"num_tokens": 361992798.0,
"step": 305
},
{
"epoch": 0.4846116267708842,
"grad_norm": 1.8374439736436459,
"learning_rate": 5.977936400514943e-06,
"loss": 0.6307,
"mean_token_accuracy": 0.7953431971371174,
"num_tokens": 367913461.0,
"step": 310
},
{
"epoch": 0.4924279433317049,
"grad_norm": 2.3033335141428,
"learning_rate": 5.864430581265406e-06,
"loss": 0.6356,
"mean_token_accuracy": 0.7944584995508194,
"num_tokens": 373852019.0,
"step": 315
},
{
"epoch": 0.5002442598925256,
"grad_norm": 1.6907176529892718,
"learning_rate": 5.750690852463339e-06,
"loss": 0.6347,
"mean_token_accuracy": 0.7937369205057621,
"num_tokens": 379764522.0,
"step": 320
},
{
"epoch": 0.5080605764533463,
"grad_norm": 1.818966581747677,
"learning_rate": 5.636790217886243e-06,
"loss": 0.6253,
"mean_token_accuracy": 0.7948664158582688,
"num_tokens": 385692482.0,
"step": 325
},
{
"epoch": 0.5158768930141671,
"grad_norm": 1.6206125873755466,
"learning_rate": 5.522801784588895e-06,
"loss": 0.631,
"mean_token_accuracy": 0.7939370617270469,
"num_tokens": 391635856.0,
"step": 330
},
{
"epoch": 0.5236932095749878,
"grad_norm": 3.3420400728114945,
"learning_rate": 5.408798715979626e-06,
"loss": 0.6341,
"mean_token_accuracy": 0.7946567349135876,
"num_tokens": 397545573.0,
"step": 335
},
{
"epoch": 0.5315095261358085,
"grad_norm": 4.102411753363632,
"learning_rate": 5.294854184860437e-06,
"loss": 0.6268,
"mean_token_accuracy": 0.7956276901066304,
"num_tokens": 403475982.0,
"step": 340
},
{
"epoch": 0.5393258426966292,
"grad_norm": 3.1767409652573853,
"learning_rate": 5.1810413264610724e-06,
"loss": 0.6276,
"mean_token_accuracy": 0.7952337145805359,
"num_tokens": 409418002.0,
"step": 345
},
{
"epoch": 0.5471421592574499,
"grad_norm": 3.166395119010415,
"learning_rate": 5.067433191497221e-06,
"loss": 0.6322,
"mean_token_accuracy": 0.7954030476510525,
"num_tokens": 415344226.0,
"step": 350
},
{
"epoch": 0.5549584758182706,
"grad_norm": 1.526943049343455,
"learning_rate": 4.954102699282953e-06,
"loss": 0.6359,
"mean_token_accuracy": 0.7941608227789402,
"num_tokens": 421277466.0,
"step": 355
},
{
"epoch": 0.5627747923790913,
"grad_norm": 1.9099044626436186,
"learning_rate": 4.841122590927511e-06,
"loss": 0.618,
"mean_token_accuracy": 0.7975563704967499,
"num_tokens": 427241607.0,
"step": 360
},
{
"epoch": 0.570591108939912,
"grad_norm": 2.5623011943198137,
"learning_rate": 4.7285653826464605e-06,
"loss": 0.6272,
"mean_token_accuracy": 0.7963444076478481,
"num_tokens": 433157588.0,
"step": 365
},
{
"epoch": 0.5784074255007328,
"grad_norm": 1.6826723465337794,
"learning_rate": 4.616503319217202e-06,
"loss": 0.6205,
"mean_token_accuracy": 0.7979116909205913,
"num_tokens": 439093218.0,
"step": 370
},
{
"epoch": 0.5862237420615535,
"grad_norm": 2.4233422176625905,
"learning_rate": 4.5050083276087155e-06,
"loss": 0.6371,
"mean_token_accuracy": 0.7943588711321354,
"num_tokens": 445010423.0,
"step": 375
},
{
"epoch": 0.5940400586223742,
"grad_norm": 1.8195780340101564,
"learning_rate": 4.394151970815259e-06,
"loss": 0.613,
"mean_token_accuracy": 0.799777788668871,
"num_tokens": 450918292.0,
"step": 380
},
{
"epoch": 0.6018563751831949,
"grad_norm": 1.8194352528770072,
"learning_rate": 4.284005401923723e-06,
"loss": 0.6225,
"mean_token_accuracy": 0.7965258292853832,
"num_tokens": 456832151.0,
"step": 385
},
{
"epoch": 0.6096726917440156,
"grad_norm": 1.7307705633009496,
"learning_rate": 4.174639318444044e-06,
"loss": 0.6191,
"mean_token_accuracy": 0.7983451545238495,
"num_tokens": 462764585.0,
"step": 390
},
{
"epoch": 0.6174890083048363,
"grad_norm": 1.5492850018471778,
"learning_rate": 4.066123916932069e-06,
"loss": 0.6232,
"mean_token_accuracy": 0.7965681925415993,
"num_tokens": 468701502.0,
"step": 395
},
{
"epoch": 0.625305324865657,
"grad_norm": 1.5785740475386758,
"learning_rate": 3.95852884793392e-06,
"loss": 0.639,
"mean_token_accuracy": 0.7930570214986801,
"num_tokens": 474663601.0,
"step": 400
},
{
"epoch": 0.6331216414264778,
"grad_norm": 1.7178497128631158,
"learning_rate": 3.851923171280848e-06,
"loss": 0.631,
"mean_token_accuracy": 0.7956325292587281,
"num_tokens": 480597092.0,
"step": 405
},
{
"epoch": 0.6409379579872985,
"grad_norm": 2.2284125586269634,
"learning_rate": 3.7463753117632086e-06,
"loss": 0.6194,
"mean_token_accuracy": 0.7979160696268082,
"num_tokens": 486517715.0,
"step": 410
},
{
"epoch": 0.6487542745481192,
"grad_norm": 1.6485020103488872,
"learning_rate": 3.6419530152120585e-06,
"loss": 0.6155,
"mean_token_accuracy": 0.7989446625113488,
"num_tokens": 492472305.0,
"step": 415
},
{
"epoch": 0.6565705911089399,
"grad_norm": 1.6583017170770122,
"learning_rate": 3.5387233050165305e-06,
"loss": 0.6154,
"mean_token_accuracy": 0.7981764920055866,
"num_tokens": 498385685.0,
"step": 420
},
{
"epoch": 0.6643869076697606,
"grad_norm": 1.7340707542937976,
"learning_rate": 3.436752439104914e-06,
"loss": 0.6232,
"mean_token_accuracy": 0.7975495472550392,
"num_tokens": 504307250.0,
"step": 425
},
{
"epoch": 0.6722032242305813,
"grad_norm": 1.7732002119177575,
"learning_rate": 3.336105867417036e-06,
"loss": 0.6136,
"mean_token_accuracy": 0.7990594677627086,
"num_tokens": 510245141.0,
"step": 430
},
{
"epoch": 0.680019540791402,
"grad_norm": 1.7788709835674736,
"learning_rate": 3.236848189895271e-06,
"loss": 0.6221,
"mean_token_accuracy": 0.7987750940024853,
"num_tokens": 516171739.0,
"step": 435
},
{
"epoch": 0.6878358573522227,
"grad_norm": 1.8951328728941093,
"learning_rate": 3.1390431150210858e-06,
"loss": 0.6216,
"mean_token_accuracy": 0.7972325548529625,
"num_tokens": 522101344.0,
"step": 440
},
{
"epoch": 0.6956521739130435,
"grad_norm": 2.265125941545771,
"learning_rate": 3.0427534189238056e-06,
"loss": 0.6272,
"mean_token_accuracy": 0.797095137834549,
"num_tokens": 528042612.0,
"step": 445
},
{
"epoch": 0.7034684904738642,
"grad_norm": 1.895267259607391,
"learning_rate": 2.9480409050877836e-06,
"loss": 0.6146,
"mean_token_accuracy": 0.7996291488409042,
"num_tokens": 533972183.0,
"step": 450
},
{
"epoch": 0.7112848070346849,
"grad_norm": 2.1073222786880508,
"learning_rate": 2.854966364683872e-06,
"loss": 0.6066,
"mean_token_accuracy": 0.8017579860985279,
"num_tokens": 539882836.0,
"step": 455
},
{
"epoch": 0.7191011235955056,
"grad_norm": 1.586483401475392,
"learning_rate": 2.7635895375506516e-06,
"loss": 0.6218,
"mean_token_accuracy": 0.79697345495224,
"num_tokens": 545834579.0,
"step": 460
},
{
"epoch": 0.7269174401563263,
"grad_norm": 1.678175813746661,
"learning_rate": 2.6739690738504428e-06,
"loss": 0.6218,
"mean_token_accuracy": 0.7964953184127808,
"num_tokens": 551762612.0,
"step": 465
},
{
"epoch": 0.734733756717147,
"grad_norm": 4.728635128027955,
"learning_rate": 2.5861624964247402e-06,
"loss": 0.6129,
"mean_token_accuracy": 0.7991872586309909,
"num_tokens": 557733732.0,
"step": 470
},
{
"epoch": 0.7425500732779677,
"grad_norm": 3.1433754367730327,
"learning_rate": 2.5002261638732066e-06,
"loss": 0.6259,
"mean_token_accuracy": 0.796341958642006,
"num_tokens": 563678120.0,
"step": 475
},
{
"epoch": 0.7503663898387885,
"grad_norm": 1.5793999144249435,
"learning_rate": 2.416215234379941e-06,
"loss": 0.6171,
"mean_token_accuracy": 0.7986149400472641,
"num_tokens": 569641651.0,
"step": 480
},
{
"epoch": 0.7581827063996092,
"grad_norm": 1.5637633654884855,
"learning_rate": 2.3341836303102336e-06,
"loss": 0.6168,
"mean_token_accuracy": 0.7989203184843063,
"num_tokens": 575596629.0,
"step": 485
},
{
"epoch": 0.7659990229604299,
"grad_norm": 1.7578654658273658,
"learning_rate": 2.2541840036005227e-06,
"loss": 0.6152,
"mean_token_accuracy": 0.8000254578888416,
"num_tokens": 581506425.0,
"step": 490
},
{
"epoch": 0.7738153395212506,
"grad_norm": 1.6111848358937764,
"learning_rate": 2.1762677019637836e-06,
"loss": 0.615,
"mean_token_accuracy": 0.7983125224709511,
"num_tokens": 587441928.0,
"step": 495
},
{
"epoch": 0.7816316560820713,
"grad_norm": 2.729056351863656,
"learning_rate": 2.100484735932027e-06,
"loss": 0.6219,
"mean_token_accuracy": 0.7974658064544201,
"num_tokens": 593387525.0,
"step": 500
},
{
"epoch": 0.789447972642892,
"grad_norm": 2.0026132736871256,
"learning_rate": 2.0268837467570714e-06,
"loss": 0.6113,
"mean_token_accuracy": 0.7999734558165074,
"num_tokens": 599357871.0,
"step": 505
},
{
"epoch": 0.7972642892037127,
"grad_norm": 3.202086545883362,
"learning_rate": 1.955511975190185e-06,
"loss": 0.6167,
"mean_token_accuracy": 0.7985524848103523,
"num_tokens": 605266801.0,
"step": 510
},
{
"epoch": 0.8050806057645334,
"grad_norm": 2.5845078387849876,
"learning_rate": 1.8864152311606342e-06,
"loss": 0.6184,
"mean_token_accuracy": 0.7977212890982628,
"num_tokens": 611200207.0,
"step": 515
},
{
"epoch": 0.8128969223253542,
"grad_norm": 1.3459195379746025,
"learning_rate": 1.8196378643726092e-06,
"loss": 0.6141,
"mean_token_accuracy": 0.7991946995258331,
"num_tokens": 617152907.0,
"step": 520
},
{
"epoch": 0.8207132388861749,
"grad_norm": 1.6903121443946976,
"learning_rate": 1.7552227358393933e-06,
"loss": 0.6166,
"mean_token_accuracy": 0.7996014229953289,
"num_tokens": 623085161.0,
"step": 525
},
{
"epoch": 0.8285295554469956,
"grad_norm": 5.000507187078701,
"learning_rate": 1.6932111903730453e-06,
"loss": 0.6106,
"mean_token_accuracy": 0.7996949210762978,
"num_tokens": 629027147.0,
"step": 530
},
{
"epoch": 0.8363458720078163,
"grad_norm": 2.026922156270765,
"learning_rate": 1.6336430300472606e-06,
"loss": 0.6045,
"mean_token_accuracy": 0.800903269648552,
"num_tokens": 634987448.0,
"step": 535
},
{
"epoch": 0.844162188568637,
"grad_norm": 4.687498745163573,
"learning_rate": 1.576556488650428e-06,
"loss": 0.6128,
"mean_token_accuracy": 0.7995632983744144,
"num_tokens": 640920908.0,
"step": 540
},
{
"epoch": 0.8519785051294577,
"grad_norm": 1.7305252046419992,
"learning_rate": 1.5219882071452967e-06,
"loss": 0.6059,
"mean_token_accuracy": 0.8014576397836208,
"num_tokens": 646854354.0,
"step": 545
},
{
"epoch": 0.8597948216902784,
"grad_norm": 1.5220809585740145,
"learning_rate": 1.4699732101510026e-06,
"loss": 0.6098,
"mean_token_accuracy": 0.7980151705443859,
"num_tokens": 652785154.0,
"step": 550
},
{
"epoch": 0.8676111382510991,
"grad_norm": 2.3863927864336896,
"learning_rate": 1.4205448834625275e-06,
"loss": 0.6174,
"mean_token_accuracy": 0.7987048149108886,
"num_tokens": 658699371.0,
"step": 555
},
{
"epoch": 0.8754274548119199,
"grad_norm": 4.490834509368819,
"learning_rate": 1.37373495262205e-06,
"loss": 0.6176,
"mean_token_accuracy": 0.7986162424087524,
"num_tokens": 664648725.0,
"step": 560
},
{
"epoch": 0.8832437713727406,
"grad_norm": 1.8330616045846946,
"learning_rate": 1.3295734625559315e-06,
"loss": 0.6157,
"mean_token_accuracy": 0.798362709581852,
"num_tokens": 670559073.0,
"step": 565
},
{
"epoch": 0.8910600879335613,
"grad_norm": 1.8866171760203552,
"learning_rate": 1.2880887582903884e-06,
"loss": 0.6163,
"mean_token_accuracy": 0.7984154649078846,
"num_tokens": 676506420.0,
"step": 570
},
{
"epoch": 0.898876404494382,
"grad_norm": 1.7452455628625234,
"learning_rate": 1.2493074667582584e-06,
"loss": 0.621,
"mean_token_accuracy": 0.7975537806749344,
"num_tokens": 682448693.0,
"step": 575
},
{
"epoch": 0.9066927210552027,
"grad_norm": 2.030867984009538,
"learning_rate": 1.213254479708519e-06,
"loss": 0.6163,
"mean_token_accuracy": 0.7988204933702946,
"num_tokens": 688390022.0,
"step": 580
},
{
"epoch": 0.9145090376160234,
"grad_norm": 1.550865659075602,
"learning_rate": 1.179952937729534e-06,
"loss": 0.6193,
"mean_token_accuracy": 0.7983230344951153,
"num_tokens": 694331611.0,
"step": 585
},
{
"epoch": 0.9223253541768441,
"grad_norm": 1.571410028230591,
"learning_rate": 1.149424215396281e-06,
"loss": 0.6136,
"mean_token_accuracy": 0.7990704528987408,
"num_tokens": 700261331.0,
"step": 590
},
{
"epoch": 0.9301416707376648,
"grad_norm": 2.163421002394794,
"learning_rate": 1.1216879075510877e-06,
"loss": 0.6141,
"mean_token_accuracy": 0.7987876988947391,
"num_tokens": 706193693.0,
"step": 595
},
{
"epoch": 0.9379579872984856,
"grad_norm": 3.7294851958982975,
"learning_rate": 1.0967618167267032e-06,
"loss": 0.5997,
"mean_token_accuracy": 0.8031502008438111,
"num_tokens": 712120509.0,
"step": 600
},
{
"epoch": 0.9457743038593063,
"grad_norm": 1.937019840071183,
"learning_rate": 1.0746619417197436e-06,
"loss": 0.6079,
"mean_token_accuracy": 0.8023772671818733,
"num_tokens": 718044202.0,
"step": 605
},
{
"epoch": 0.953590620420127,
"grad_norm": 1.822048015248325,
"learning_rate": 1.0554024673218808e-06,
"loss": 0.6102,
"mean_token_accuracy": 0.8010022938251495,
"num_tokens": 723993614.0,
"step": 610
},
{
"epoch": 0.9614069369809477,
"grad_norm": 1.6712441013840658,
"learning_rate": 1.0389957552153385e-06,
"loss": 0.6053,
"mean_token_accuracy": 0.8014584824442863,
"num_tokens": 729959466.0,
"step": 615
},
{
"epoch": 0.9692232535417684,
"grad_norm": 3.2548443190376073,
"learning_rate": 1.0254523360385555e-06,
"loss": 0.6146,
"mean_token_accuracy": 0.7999479919672012,
"num_tokens": 735885753.0,
"step": 620
},
{
"epoch": 0.9770395701025891,
"grad_norm": 7.139737923902445,
"learning_rate": 1.0147809026271017e-06,
"loss": 0.6145,
"mean_token_accuracy": 0.7991539172828197,
"num_tokens": 741813482.0,
"step": 625
},
{
"epoch": 0.9848558866634098,
"grad_norm": 1.7874591260763464,
"learning_rate": 1.0069883044341846e-06,
"loss": 0.6142,
"mean_token_accuracy": 0.7999210134148598,
"num_tokens": 747741960.0,
"step": 630
},
{
"epoch": 0.9926722032242306,
"grad_norm": 3.2850538471833945,
"learning_rate": 1.0020795431343349e-06,
"loss": 0.6074,
"mean_token_accuracy": 0.8015001997351646,
"num_tokens": 753638158.0,
"step": 635
},
{
"epoch": 1.0,
"grad_norm": 2.3599969643790777,
"learning_rate": 1.0000577694130827e-06,
"loss": 0.5746,
"mean_token_accuracy": 0.8004830511411031,
"num_tokens": 759191155.0,
"step": 640
},
{
"epoch": 1.0,
"step": 640,
"total_flos": 1.1033250327691264e+16,
"train_loss": 0.6782059136778116,
"train_runtime": 78870.76,
"train_samples_per_second": 4.152,
"train_steps_per_second": 0.008
}
],
"logging_steps": 5,
"max_steps": 640,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1033250327691264e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}