cutelemonlili's picture
Add files using upload-large-folder tool
55b9e64 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 200,
"global_step": 134,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014925373134328358,
"grad_norm": 2.4583136454633454,
"learning_rate": 9.99862592554908e-06,
"loss": 0.1709,
"step": 1
},
{
"epoch": 0.029850746268656716,
"grad_norm": 1.2601474874951664,
"learning_rate": 9.994504457428557e-06,
"loss": 0.1095,
"step": 2
},
{
"epoch": 0.04477611940298507,
"grad_norm": 1.8921245514823541,
"learning_rate": 9.987637860920053e-06,
"loss": 0.1123,
"step": 3
},
{
"epoch": 0.05970149253731343,
"grad_norm": 1.0381477254848812,
"learning_rate": 9.978029910109491e-06,
"loss": 0.0897,
"step": 4
},
{
"epoch": 0.07462686567164178,
"grad_norm": 0.8231161139163885,
"learning_rate": 9.965685885812773e-06,
"loss": 0.0804,
"step": 5
},
{
"epoch": 0.08955223880597014,
"grad_norm": 3.3306861693595704,
"learning_rate": 9.950612572673255e-06,
"loss": 0.1197,
"step": 6
},
{
"epoch": 0.1044776119402985,
"grad_norm": 0.8873942793156309,
"learning_rate": 9.932818255432733e-06,
"loss": 0.1052,
"step": 7
},
{
"epoch": 0.11940298507462686,
"grad_norm": 0.7726110244054883,
"learning_rate": 9.91231271437788e-06,
"loss": 0.09,
"step": 8
},
{
"epoch": 0.13432835820895522,
"grad_norm": 0.7713735038013977,
"learning_rate": 9.889107219964726e-06,
"loss": 0.0847,
"step": 9
},
{
"epoch": 0.14925373134328357,
"grad_norm": 0.8179878874298719,
"learning_rate": 9.863214526624065e-06,
"loss": 0.0899,
"step": 10
},
{
"epoch": 0.16417910447761194,
"grad_norm": 0.793854117532711,
"learning_rate": 9.834648865751254e-06,
"loss": 0.0885,
"step": 11
},
{
"epoch": 0.1791044776119403,
"grad_norm": 0.6649283964833135,
"learning_rate": 9.803425937884202e-06,
"loss": 0.078,
"step": 12
},
{
"epoch": 0.19402985074626866,
"grad_norm": 0.8490726920421898,
"learning_rate": 9.769562904073896e-06,
"loss": 0.0878,
"step": 13
},
{
"epoch": 0.208955223880597,
"grad_norm": 0.7411456396869534,
"learning_rate": 9.733078376452172e-06,
"loss": 0.0881,
"step": 14
},
{
"epoch": 0.22388059701492538,
"grad_norm": 0.7448122603540034,
"learning_rate": 9.693992408001934e-06,
"loss": 0.091,
"step": 15
},
{
"epoch": 0.23880597014925373,
"grad_norm": 0.672661340651816,
"learning_rate": 9.652326481535434e-06,
"loss": 0.0847,
"step": 16
},
{
"epoch": 0.2537313432835821,
"grad_norm": 0.6295351920280576,
"learning_rate": 9.608103497886687e-06,
"loss": 0.0751,
"step": 17
},
{
"epoch": 0.26865671641791045,
"grad_norm": 0.5987419993650692,
"learning_rate": 9.561347763324484e-06,
"loss": 0.0757,
"step": 18
},
{
"epoch": 0.2835820895522388,
"grad_norm": 0.6862099670787631,
"learning_rate": 9.512084976192944e-06,
"loss": 0.0832,
"step": 19
},
{
"epoch": 0.29850746268656714,
"grad_norm": 0.6474317718234073,
"learning_rate": 9.460342212786933e-06,
"loss": 0.0785,
"step": 20
},
{
"epoch": 0.31343283582089554,
"grad_norm": 0.6520670338068375,
"learning_rate": 9.406147912470142e-06,
"loss": 0.0832,
"step": 21
},
{
"epoch": 0.3283582089552239,
"grad_norm": 0.6875547799836488,
"learning_rate": 9.349531862043952e-06,
"loss": 0.0909,
"step": 22
},
{
"epoch": 0.34328358208955223,
"grad_norm": 0.6406155904616685,
"learning_rate": 9.290525179375722e-06,
"loss": 0.0817,
"step": 23
},
{
"epoch": 0.3582089552238806,
"grad_norm": 0.6570196434589318,
"learning_rate": 9.229160296295488e-06,
"loss": 0.0847,
"step": 24
},
{
"epoch": 0.373134328358209,
"grad_norm": 0.5570171509580777,
"learning_rate": 9.165470940770458e-06,
"loss": 0.0739,
"step": 25
},
{
"epoch": 0.3880597014925373,
"grad_norm": 0.620261743808898,
"learning_rate": 9.099492118367123e-06,
"loss": 0.0861,
"step": 26
},
{
"epoch": 0.40298507462686567,
"grad_norm": 0.6886602468358877,
"learning_rate": 9.03126009301115e-06,
"loss": 0.0943,
"step": 27
},
{
"epoch": 0.417910447761194,
"grad_norm": 0.7131708436753863,
"learning_rate": 8.960812367055646e-06,
"loss": 0.0892,
"step": 28
},
{
"epoch": 0.43283582089552236,
"grad_norm": 0.6391387675910597,
"learning_rate": 8.888187660668762e-06,
"loss": 0.0832,
"step": 29
},
{
"epoch": 0.44776119402985076,
"grad_norm": 0.6940047679170313,
"learning_rate": 8.81342589055191e-06,
"loss": 0.0906,
"step": 30
},
{
"epoch": 0.4626865671641791,
"grad_norm": 0.6503008059328946,
"learning_rate": 8.736568148000386e-06,
"loss": 0.0968,
"step": 31
},
{
"epoch": 0.47761194029850745,
"grad_norm": 0.6090207680984978,
"learning_rate": 8.657656676318346e-06,
"loss": 0.0847,
"step": 32
},
{
"epoch": 0.4925373134328358,
"grad_norm": 0.6434605637551735,
"learning_rate": 8.576734847600639e-06,
"loss": 0.0853,
"step": 33
},
{
"epoch": 0.5074626865671642,
"grad_norm": 0.6377279956113436,
"learning_rate": 8.49384713889421e-06,
"loss": 0.0899,
"step": 34
},
{
"epoch": 0.5223880597014925,
"grad_norm": 0.644362530558179,
"learning_rate": 8.40903910775219e-06,
"loss": 0.0827,
"step": 35
},
{
"epoch": 0.5373134328358209,
"grad_norm": 0.642985561685258,
"learning_rate": 8.32235736719411e-06,
"loss": 0.0888,
"step": 36
},
{
"epoch": 0.5522388059701493,
"grad_norm": 0.6790092652999941,
"learning_rate": 8.233849560085994e-06,
"loss": 0.0846,
"step": 37
},
{
"epoch": 0.5671641791044776,
"grad_norm": 0.7854755458868027,
"learning_rate": 8.143564332954426e-06,
"loss": 0.1031,
"step": 38
},
{
"epoch": 0.582089552238806,
"grad_norm": 0.6030440972462469,
"learning_rate": 8.051551309248961e-06,
"loss": 0.0849,
"step": 39
},
{
"epoch": 0.5970149253731343,
"grad_norm": 0.6418639082548031,
"learning_rate": 7.957861062067614e-06,
"loss": 0.0852,
"step": 40
},
{
"epoch": 0.6119402985074627,
"grad_norm": 0.6566500879981874,
"learning_rate": 7.86254508636036e-06,
"loss": 0.0887,
"step": 41
},
{
"epoch": 0.6268656716417911,
"grad_norm": 0.6290631945385817,
"learning_rate": 7.765655770625997e-06,
"loss": 0.0855,
"step": 42
},
{
"epoch": 0.6417910447761194,
"grad_norm": 0.6140886580472066,
"learning_rate": 7.667246368117852e-06,
"loss": 0.0818,
"step": 43
},
{
"epoch": 0.6567164179104478,
"grad_norm": 0.6174619223952549,
"learning_rate": 7.56737096757421e-06,
"loss": 0.0856,
"step": 44
},
{
"epoch": 0.6716417910447762,
"grad_norm": 0.6195862366153396,
"learning_rate": 7.466084463489537e-06,
"loss": 0.0801,
"step": 45
},
{
"epoch": 0.6865671641791045,
"grad_norm": 0.6331901290551701,
"learning_rate": 7.363442525942827e-06,
"loss": 0.0844,
"step": 46
},
{
"epoch": 0.7014925373134329,
"grad_norm": 0.7083908752048309,
"learning_rate": 7.25950156999967e-06,
"loss": 0.0894,
"step": 47
},
{
"epoch": 0.7164179104477612,
"grad_norm": 0.6388671244490378,
"learning_rate": 7.1543187247048525e-06,
"loss": 0.0906,
"step": 48
},
{
"epoch": 0.7313432835820896,
"grad_norm": 0.5777376817133963,
"learning_rate": 7.047951801682533e-06,
"loss": 0.0775,
"step": 49
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.5907825361104937,
"learning_rate": 6.9404592633612486e-06,
"loss": 0.08,
"step": 50
},
{
"epoch": 0.7611940298507462,
"grad_norm": 0.6197333346964937,
"learning_rate": 6.831900190841232e-06,
"loss": 0.0852,
"step": 51
},
{
"epoch": 0.7761194029850746,
"grad_norm": 0.580913389747538,
"learning_rate": 6.722334251421665e-06,
"loss": 0.0786,
"step": 52
},
{
"epoch": 0.7910447761194029,
"grad_norm": 0.6769846397314266,
"learning_rate": 6.611821665805769e-06,
"loss": 0.0859,
"step": 53
},
{
"epoch": 0.8059701492537313,
"grad_norm": 0.6493382927830379,
"learning_rate": 6.500423175001705e-06,
"loss": 0.0918,
"step": 54
},
{
"epoch": 0.8208955223880597,
"grad_norm": 0.6002157339319569,
"learning_rate": 6.388200006937503e-06,
"loss": 0.089,
"step": 55
},
{
"epoch": 0.835820895522388,
"grad_norm": 0.5447504002720616,
"learning_rate": 6.275213842808383e-06,
"loss": 0.0731,
"step": 56
},
{
"epoch": 0.8507462686567164,
"grad_norm": 0.5621404387468115,
"learning_rate": 6.161526783174917e-06,
"loss": 0.0762,
"step": 57
},
{
"epoch": 0.8656716417910447,
"grad_norm": 0.6279143619008688,
"learning_rate": 6.047201313830724e-06,
"loss": 0.0921,
"step": 58
},
{
"epoch": 0.8805970149253731,
"grad_norm": 0.5567853485138207,
"learning_rate": 5.932300271458406e-06,
"loss": 0.0692,
"step": 59
},
{
"epoch": 0.8955223880597015,
"grad_norm": 0.5831390070498211,
"learning_rate": 5.816886809092651e-06,
"loss": 0.0777,
"step": 60
},
{
"epoch": 0.9104477611940298,
"grad_norm": 0.575437949186205,
"learning_rate": 5.701024361409431e-06,
"loss": 0.0803,
"step": 61
},
{
"epoch": 0.9253731343283582,
"grad_norm": 0.6339642524977537,
"learning_rate": 5.584776609860414e-06,
"loss": 0.0893,
"step": 62
},
{
"epoch": 0.9402985074626866,
"grad_norm": 0.5924535700885266,
"learning_rate": 5.468207447671755e-06,
"loss": 0.0844,
"step": 63
},
{
"epoch": 0.9552238805970149,
"grad_norm": 0.6155460855755083,
"learning_rate": 5.351380944726465e-06,
"loss": 0.0836,
"step": 64
},
{
"epoch": 0.9701492537313433,
"grad_norm": 0.640329747223743,
"learning_rate": 5.234361312349701e-06,
"loss": 0.0951,
"step": 65
},
{
"epoch": 0.9850746268656716,
"grad_norm": 0.5300911342515855,
"learning_rate": 5.117212868016303e-06,
"loss": 0.0655,
"step": 66
},
{
"epoch": 1.0,
"grad_norm": 0.4687094870612933,
"learning_rate": 5e-06,
"loss": 0.0424,
"step": 67
},
{
"epoch": 1.0149253731343284,
"grad_norm": 0.4076938808158179,
"learning_rate": 4.882787131983698e-06,
"loss": 0.0319,
"step": 68
},
{
"epoch": 1.0298507462686568,
"grad_norm": 0.44699344595961127,
"learning_rate": 4.765638687650299e-06,
"loss": 0.0397,
"step": 69
},
{
"epoch": 1.044776119402985,
"grad_norm": 0.44767644296408526,
"learning_rate": 4.6486190552735375e-06,
"loss": 0.0335,
"step": 70
},
{
"epoch": 1.0597014925373134,
"grad_norm": 0.37505346262796413,
"learning_rate": 4.531792552328247e-06,
"loss": 0.0285,
"step": 71
},
{
"epoch": 1.0746268656716418,
"grad_norm": 0.4657077829483448,
"learning_rate": 4.415223390139588e-06,
"loss": 0.0326,
"step": 72
},
{
"epoch": 1.0895522388059702,
"grad_norm": 0.34306186183329557,
"learning_rate": 4.2989756385905715e-06,
"loss": 0.0244,
"step": 73
},
{
"epoch": 1.1044776119402986,
"grad_norm": 0.39032497882907513,
"learning_rate": 4.183113190907349e-06,
"loss": 0.027,
"step": 74
},
{
"epoch": 1.1194029850746268,
"grad_norm": 0.4702211175747881,
"learning_rate": 4.067699728541595e-06,
"loss": 0.0316,
"step": 75
},
{
"epoch": 1.1343283582089552,
"grad_norm": 0.4455655473637308,
"learning_rate": 3.952798686169279e-06,
"loss": 0.0303,
"step": 76
},
{
"epoch": 1.1492537313432836,
"grad_norm": 0.4371493860049261,
"learning_rate": 3.838473216825085e-06,
"loss": 0.0282,
"step": 77
},
{
"epoch": 1.164179104477612,
"grad_norm": 0.4289807711491057,
"learning_rate": 3.7247861571916183e-06,
"loss": 0.0272,
"step": 78
},
{
"epoch": 1.1791044776119404,
"grad_norm": 0.5368576489602601,
"learning_rate": 3.611799993062497e-06,
"loss": 0.0351,
"step": 79
},
{
"epoch": 1.1940298507462686,
"grad_norm": 0.6142429871519881,
"learning_rate": 3.4995768249982975e-06,
"loss": 0.0377,
"step": 80
},
{
"epoch": 1.208955223880597,
"grad_norm": 0.4437616080442814,
"learning_rate": 3.388178334194232e-06,
"loss": 0.0254,
"step": 81
},
{
"epoch": 1.2238805970149254,
"grad_norm": 0.425005368183552,
"learning_rate": 3.2776657485783357e-06,
"loss": 0.0217,
"step": 82
},
{
"epoch": 1.2388059701492538,
"grad_norm": 0.45656133078022915,
"learning_rate": 3.168099809158769e-06,
"loss": 0.0257,
"step": 83
},
{
"epoch": 1.2537313432835822,
"grad_norm": 0.543019359956107,
"learning_rate": 3.059540736638751e-06,
"loss": 0.0291,
"step": 84
},
{
"epoch": 1.2686567164179103,
"grad_norm": 0.48489281581823257,
"learning_rate": 2.9520481983174675e-06,
"loss": 0.0249,
"step": 85
},
{
"epoch": 1.2835820895522387,
"grad_norm": 0.5065165276548232,
"learning_rate": 2.8456812752951483e-06,
"loss": 0.0226,
"step": 86
},
{
"epoch": 1.2985074626865671,
"grad_norm": 0.5835532502940851,
"learning_rate": 2.740498430000332e-06,
"loss": 0.0262,
"step": 87
},
{
"epoch": 1.3134328358208955,
"grad_norm": 0.41781499689334245,
"learning_rate": 2.636557474057173e-06,
"loss": 0.0171,
"step": 88
},
{
"epoch": 1.328358208955224,
"grad_norm": 0.60361339503175,
"learning_rate": 2.533915536510464e-06,
"loss": 0.0249,
"step": 89
},
{
"epoch": 1.3432835820895521,
"grad_norm": 0.47456828088394937,
"learning_rate": 2.4326290324257896e-06,
"loss": 0.0246,
"step": 90
},
{
"epoch": 1.3582089552238805,
"grad_norm": 0.4465597229891302,
"learning_rate": 2.3327536318821496e-06,
"loss": 0.0183,
"step": 91
},
{
"epoch": 1.373134328358209,
"grad_norm": 0.49082928585159896,
"learning_rate": 2.234344229374003e-06,
"loss": 0.0272,
"step": 92
},
{
"epoch": 1.3880597014925373,
"grad_norm": 0.4703289245897022,
"learning_rate": 2.1374549136396417e-06,
"loss": 0.0259,
"step": 93
},
{
"epoch": 1.4029850746268657,
"grad_norm": 0.4919381501347153,
"learning_rate": 2.042138937932388e-06,
"loss": 0.0244,
"step": 94
},
{
"epoch": 1.417910447761194,
"grad_norm": 0.48234747726237315,
"learning_rate": 1.9484486907510405e-06,
"loss": 0.0259,
"step": 95
},
{
"epoch": 1.4328358208955223,
"grad_norm": 0.46679191916730023,
"learning_rate": 1.856435667045577e-06,
"loss": 0.0248,
"step": 96
},
{
"epoch": 1.4477611940298507,
"grad_norm": 0.4915526917266499,
"learning_rate": 1.7661504399140066e-06,
"loss": 0.0253,
"step": 97
},
{
"epoch": 1.462686567164179,
"grad_norm": 0.4959939539468189,
"learning_rate": 1.677642632805892e-06,
"loss": 0.029,
"step": 98
},
{
"epoch": 1.4776119402985075,
"grad_norm": 0.47533058613210927,
"learning_rate": 1.5909608922478108e-06,
"loss": 0.0222,
"step": 99
},
{
"epoch": 1.4925373134328357,
"grad_norm": 0.41184618371805026,
"learning_rate": 1.5061528611057917e-06,
"loss": 0.0186,
"step": 100
},
{
"epoch": 1.5074626865671643,
"grad_norm": 0.5029260437214431,
"learning_rate": 1.4232651523993635e-06,
"loss": 0.0246,
"step": 101
},
{
"epoch": 1.5223880597014925,
"grad_norm": 0.4856272674409962,
"learning_rate": 1.3423433236816563e-06,
"loss": 0.0262,
"step": 102
},
{
"epoch": 1.537313432835821,
"grad_norm": 0.5515452132397586,
"learning_rate": 1.2634318519996148e-06,
"loss": 0.0265,
"step": 103
},
{
"epoch": 1.5522388059701493,
"grad_norm": 0.48385801987338495,
"learning_rate": 1.186574109448091e-06,
"loss": 0.0253,
"step": 104
},
{
"epoch": 1.5671641791044775,
"grad_norm": 0.4977052401413162,
"learning_rate": 1.1118123393312397e-06,
"loss": 0.0317,
"step": 105
},
{
"epoch": 1.582089552238806,
"grad_norm": 0.5013927727792972,
"learning_rate": 1.0391876329443534e-06,
"loss": 0.0248,
"step": 106
},
{
"epoch": 1.5970149253731343,
"grad_norm": 0.5012798267287072,
"learning_rate": 9.687399069888515e-07,
"loss": 0.0253,
"step": 107
},
{
"epoch": 1.6119402985074627,
"grad_norm": 0.4798753241925998,
"learning_rate": 9.005078816328772e-07,
"loss": 0.0208,
"step": 108
},
{
"epoch": 1.626865671641791,
"grad_norm": 0.4892720130785747,
"learning_rate": 8.345290592295429e-07,
"loss": 0.0256,
"step": 109
},
{
"epoch": 1.6417910447761193,
"grad_norm": 0.44880610381720976,
"learning_rate": 7.708397037045129e-07,
"loss": 0.024,
"step": 110
},
{
"epoch": 1.6567164179104479,
"grad_norm": 0.5198878749228643,
"learning_rate": 7.094748206242797e-07,
"loss": 0.0282,
"step": 111
},
{
"epoch": 1.671641791044776,
"grad_norm": 0.4259309199030039,
"learning_rate": 6.50468137956049e-07,
"loss": 0.0203,
"step": 112
},
{
"epoch": 1.6865671641791045,
"grad_norm": 0.44723898312223315,
"learning_rate": 5.938520875298587e-07,
"loss": 0.0262,
"step": 113
},
{
"epoch": 1.7014925373134329,
"grad_norm": 0.4649229524932995,
"learning_rate": 5.396577872130676e-07,
"loss": 0.0234,
"step": 114
},
{
"epoch": 1.716417910447761,
"grad_norm": 0.46689935485099987,
"learning_rate": 4.879150238070585e-07,
"loss": 0.0235,
"step": 115
},
{
"epoch": 1.7313432835820897,
"grad_norm": 0.4250501253858005,
"learning_rate": 4.386522366755169e-07,
"loss": 0.021,
"step": 116
},
{
"epoch": 1.7462686567164178,
"grad_norm": 0.4965625077414352,
"learning_rate": 3.918965021133131e-07,
"loss": 0.0285,
"step": 117
},
{
"epoch": 1.7611940298507462,
"grad_norm": 0.5192751153508381,
"learning_rate": 3.4767351846456744e-07,
"loss": 0.0294,
"step": 118
},
{
"epoch": 1.7761194029850746,
"grad_norm": 0.4555879578505303,
"learning_rate": 3.0600759199806815e-07,
"loss": 0.026,
"step": 119
},
{
"epoch": 1.7910447761194028,
"grad_norm": 0.4702463723473376,
"learning_rate": 2.669216235478295e-07,
"loss": 0.0268,
"step": 120
},
{
"epoch": 1.8059701492537314,
"grad_norm": 0.5014638902986028,
"learning_rate": 2.3043709592610486e-07,
"loss": 0.0282,
"step": 121
},
{
"epoch": 1.8208955223880596,
"grad_norm": 0.5015871929717038,
"learning_rate": 1.9657406211579966e-07,
"loss": 0.0276,
"step": 122
},
{
"epoch": 1.835820895522388,
"grad_norm": 0.46199043552912816,
"learning_rate": 1.6535113424874683e-07,
"loss": 0.0253,
"step": 123
},
{
"epoch": 1.8507462686567164,
"grad_norm": 0.472386855814729,
"learning_rate": 1.3678547337593494e-07,
"loss": 0.0231,
"step": 124
},
{
"epoch": 1.8656716417910446,
"grad_norm": 0.44628541420234796,
"learning_rate": 1.1089278003527438e-07,
"loss": 0.0229,
"step": 125
},
{
"epoch": 1.8805970149253732,
"grad_norm": 0.4945658271403232,
"learning_rate": 8.768728562211948e-08,
"loss": 0.0259,
"step": 126
},
{
"epoch": 1.8955223880597014,
"grad_norm": 0.4631666908930044,
"learning_rate": 6.718174456726789e-08,
"loss": 0.0254,
"step": 127
},
{
"epoch": 1.9104477611940298,
"grad_norm": 0.47821378359248506,
"learning_rate": 4.9387427326745287e-08,
"loss": 0.0257,
"step": 128
},
{
"epoch": 1.9253731343283582,
"grad_norm": 0.4463060792083917,
"learning_rate": 3.431411418722941e-08,
"loss": 0.023,
"step": 129
},
{
"epoch": 1.9402985074626866,
"grad_norm": 0.4458879106581698,
"learning_rate": 2.1970089890509527e-08,
"loss": 0.0237,
"step": 130
},
{
"epoch": 1.955223880597015,
"grad_norm": 0.39654579447619215,
"learning_rate": 1.2362139079949431e-08,
"loss": 0.0193,
"step": 131
},
{
"epoch": 1.9701492537313432,
"grad_norm": 0.4384754664189478,
"learning_rate": 5.495542571443135e-09,
"loss": 0.0227,
"step": 132
},
{
"epoch": 1.9850746268656716,
"grad_norm": 0.40331068244695245,
"learning_rate": 1.3740744509205263e-09,
"loss": 0.0209,
"step": 133
},
{
"epoch": 2.0,
"grad_norm": 0.3037784601924941,
"learning_rate": 0.0,
"loss": 0.013,
"step": 134
},
{
"epoch": 2.0,
"step": 134,
"total_flos": 8781261963264.0,
"train_loss": 0.05639281026574213,
"train_runtime": 528.9145,
"train_samples_per_second": 2.0,
"train_steps_per_second": 0.253
}
],
"logging_steps": 1,
"max_steps": 134,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8781261963264.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}