|
{ |
|
"best_metric": 1.3203791379928589, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.15088645794039984, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007544322897019992, |
|
"grad_norm": 28.610990524291992, |
|
"learning_rate": 5e-06, |
|
"loss": 8.4345, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007544322897019992, |
|
"eval_loss": 2.208278179168701, |
|
"eval_runtime": 220.3339, |
|
"eval_samples_per_second": 10.135, |
|
"eval_steps_per_second": 5.07, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015088645794039985, |
|
"grad_norm": 29.75434112548828, |
|
"learning_rate": 1e-05, |
|
"loss": 7.8978, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002263296869105998, |
|
"grad_norm": 25.781906127929688, |
|
"learning_rate": 1.5e-05, |
|
"loss": 8.1028, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003017729158807997, |
|
"grad_norm": 21.908246994018555, |
|
"learning_rate": 2e-05, |
|
"loss": 7.9355, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003772161448509996, |
|
"grad_norm": 21.276395797729492, |
|
"learning_rate": 2.5e-05, |
|
"loss": 7.2463, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004526593738211996, |
|
"grad_norm": 18.05831527709961, |
|
"learning_rate": 3e-05, |
|
"loss": 7.0793, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005281026027913994, |
|
"grad_norm": 18.404077529907227, |
|
"learning_rate": 3.5e-05, |
|
"loss": 6.6955, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006035458317615994, |
|
"grad_norm": 14.49231243133545, |
|
"learning_rate": 4e-05, |
|
"loss": 6.241, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0067898906073179935, |
|
"grad_norm": 13.323002815246582, |
|
"learning_rate": 4.5e-05, |
|
"loss": 6.7582, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007544322897019992, |
|
"grad_norm": 13.41299819946289, |
|
"learning_rate": 5e-05, |
|
"loss": 6.5987, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008298755186721992, |
|
"grad_norm": 12.793521881103516, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 6.5348, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009053187476423991, |
|
"grad_norm": 12.212225914001465, |
|
"learning_rate": 6e-05, |
|
"loss": 5.9619, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00980761976612599, |
|
"grad_norm": 15.029327392578125, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 6.5328, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010562052055827989, |
|
"grad_norm": 41.65517807006836, |
|
"learning_rate": 7e-05, |
|
"loss": 6.5128, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011316484345529988, |
|
"grad_norm": 26.0521240234375, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 6.2811, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012070916635231988, |
|
"grad_norm": 16.089492797851562, |
|
"learning_rate": 8e-05, |
|
"loss": 6.7146, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012825348924933987, |
|
"grad_norm": 11.127829551696777, |
|
"learning_rate": 8.5e-05, |
|
"loss": 6.6048, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013579781214635987, |
|
"grad_norm": 12.313581466674805, |
|
"learning_rate": 9e-05, |
|
"loss": 6.6018, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.014334213504337986, |
|
"grad_norm": 9.649194717407227, |
|
"learning_rate": 9.5e-05, |
|
"loss": 6.4406, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.015088645794039984, |
|
"grad_norm": 9.973852157592773, |
|
"learning_rate": 0.0001, |
|
"loss": 5.958, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015843078083741986, |
|
"grad_norm": 10.235247611999512, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 6.0049, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.016597510373443983, |
|
"grad_norm": 12.643736839294434, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 5.9654, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01735194266314598, |
|
"grad_norm": 11.938157081604004, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 5.579, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.018106374952847983, |
|
"grad_norm": 11.99202823638916, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 6.014, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01886080724254998, |
|
"grad_norm": 12.833913803100586, |
|
"learning_rate": 9.980973490458728e-05, |
|
"loss": 6.1423, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01961523953225198, |
|
"grad_norm": 9.916985511779785, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 5.8949, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02036967182195398, |
|
"grad_norm": 11.102204322814941, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 6.1882, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.021124104111655977, |
|
"grad_norm": 10.992071151733398, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 6.3971, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02187853640135798, |
|
"grad_norm": 10.18704605102539, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 6.0592, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.022632968691059976, |
|
"grad_norm": 11.332548141479492, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 6.082, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023387400980761978, |
|
"grad_norm": 9.355793952941895, |
|
"learning_rate": 9.908135917238321e-05, |
|
"loss": 5.669, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.024141833270463976, |
|
"grad_norm": 9.461305618286133, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 5.5116, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.024896265560165973, |
|
"grad_norm": 10.504962921142578, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 6.328, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.025650697849867975, |
|
"grad_norm": 9.149413108825684, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 5.3682, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.026405130139569973, |
|
"grad_norm": 9.902061462402344, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 5.6849, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.027159562429271974, |
|
"grad_norm": 9.989099502563477, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 5.3639, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02791399471897397, |
|
"grad_norm": 9.415989875793457, |
|
"learning_rate": 9.781523779815179e-05, |
|
"loss": 5.2341, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.028668427008675973, |
|
"grad_norm": 11.008522987365723, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 5.0794, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02942285929837797, |
|
"grad_norm": 10.653331756591797, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 6.2048, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03017729158807997, |
|
"grad_norm": 14.365373611450195, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 5.6984, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03093172387778197, |
|
"grad_norm": 11.19192886352539, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 5.2766, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03168615616748397, |
|
"grad_norm": 10.388138771057129, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 5.033, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03244058845718597, |
|
"grad_norm": 9.952187538146973, |
|
"learning_rate": 9.602524267262203e-05, |
|
"loss": 4.9101, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03319502074688797, |
|
"grad_norm": 10.397631645202637, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 5.0858, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.033949453036589965, |
|
"grad_norm": 10.087145805358887, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 4.4193, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03470388532629196, |
|
"grad_norm": 11.928916931152344, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 4.2049, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03545831761599397, |
|
"grad_norm": 13.15577220916748, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 3.8644, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.036212749905695965, |
|
"grad_norm": 13.862105369567871, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 3.52, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03696718219539796, |
|
"grad_norm": 9.312602996826172, |
|
"learning_rate": 9.373098535696979e-05, |
|
"loss": 1.3459, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03772161448509996, |
|
"grad_norm": 15.924839973449707, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 2.7023, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03772161448509996, |
|
"eval_loss": 1.6293420791625977, |
|
"eval_runtime": 222.4612, |
|
"eval_samples_per_second": 10.038, |
|
"eval_steps_per_second": 5.021, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03847604677480196, |
|
"grad_norm": 14.912610054016113, |
|
"learning_rate": 9.285836503510562e-05, |
|
"loss": 6.8462, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03923047906450396, |
|
"grad_norm": 12.471588134765625, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 7.0599, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03998491135420596, |
|
"grad_norm": 8.87014389038086, |
|
"learning_rate": 9.193352839727121e-05, |
|
"loss": 6.3873, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04073934364390796, |
|
"grad_norm": 8.259026527404785, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 5.9529, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04149377593360996, |
|
"grad_norm": 10.203092575073242, |
|
"learning_rate": 9.09576022144496e-05, |
|
"loss": 6.6684, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.042248208223311955, |
|
"grad_norm": 8.597597122192383, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 6.3543, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04300264051301396, |
|
"grad_norm": 8.163016319274902, |
|
"learning_rate": 8.993177550236464e-05, |
|
"loss": 5.8639, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04375707280271596, |
|
"grad_norm": 8.327492713928223, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 6.0806, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.044511505092417955, |
|
"grad_norm": 7.924893856048584, |
|
"learning_rate": 8.885729807284856e-05, |
|
"loss": 6.2455, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04526593738211995, |
|
"grad_norm": 8.130339622497559, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 5.4982, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04602036967182195, |
|
"grad_norm": 8.018017768859863, |
|
"learning_rate": 8.773547901113862e-05, |
|
"loss": 5.7014, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.046774801961523955, |
|
"grad_norm": 8.45346736907959, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 6.2539, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04752923425122595, |
|
"grad_norm": 10.911635398864746, |
|
"learning_rate": 8.656768508095853e-05, |
|
"loss": 6.3726, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04828366654092795, |
|
"grad_norm": 9.591697692871094, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 6.1596, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.04903809883062995, |
|
"grad_norm": 7.932915210723877, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 6.1618, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04979253112033195, |
|
"grad_norm": 8.063417434692383, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 5.4566, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05054696341003395, |
|
"grad_norm": 8.73833179473877, |
|
"learning_rate": 8.409991800312493e-05, |
|
"loss": 6.3322, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05130139569973595, |
|
"grad_norm": 10.085183143615723, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 5.8554, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05205582798943795, |
|
"grad_norm": 10.401449203491211, |
|
"learning_rate": 8.280295144952536e-05, |
|
"loss": 6.3202, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.052810260279139945, |
|
"grad_norm": 8.073527336120605, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 5.6743, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05356469256884195, |
|
"grad_norm": 8.164782524108887, |
|
"learning_rate": 8.146601955249188e-05, |
|
"loss": 5.9845, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05431912485854395, |
|
"grad_norm": 7.460975170135498, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 6.0119, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.055073557148245945, |
|
"grad_norm": 8.160001754760742, |
|
"learning_rate": 8.009075115760243e-05, |
|
"loss": 5.8083, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05582798943794794, |
|
"grad_norm": 7.511910438537598, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 5.9554, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05658242172764994, |
|
"grad_norm": 8.06807804107666, |
|
"learning_rate": 7.86788218175523e-05, |
|
"loss": 5.4149, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.057336854017351946, |
|
"grad_norm": 8.23879623413086, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 6.0332, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.058091286307053944, |
|
"grad_norm": 8.210224151611328, |
|
"learning_rate": 7.723195175075136e-05, |
|
"loss": 5.5186, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05884571859675594, |
|
"grad_norm": 8.879250526428223, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 5.9477, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05960015088645794, |
|
"grad_norm": 8.089262962341309, |
|
"learning_rate": 7.575190374550272e-05, |
|
"loss": 5.8029, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06035458317615994, |
|
"grad_norm": 8.588730812072754, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 5.6917, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06110901546586194, |
|
"grad_norm": 7.952780723571777, |
|
"learning_rate": 7.424048101231686e-05, |
|
"loss": 5.4262, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06186344775556394, |
|
"grad_norm": 7.111525058746338, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 5.1552, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06261788004526593, |
|
"grad_norm": 8.799844741821289, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 6.2327, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06337231233496794, |
|
"grad_norm": 7.895553112030029, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 4.8941, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06412674462466994, |
|
"grad_norm": 8.473020553588867, |
|
"learning_rate": 7.113091308703498e-05, |
|
"loss": 6.1927, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06488117691437194, |
|
"grad_norm": 8.47846508026123, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 5.1994, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06563560920407394, |
|
"grad_norm": 8.409833908081055, |
|
"learning_rate": 6.953655642446368e-05, |
|
"loss": 6.0418, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06639004149377593, |
|
"grad_norm": 8.531917572021484, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 5.0468, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06714447378347793, |
|
"grad_norm": 8.19450569152832, |
|
"learning_rate": 6.7918397477265e-05, |
|
"loss": 5.3869, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06789890607317993, |
|
"grad_norm": 8.973502159118652, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 5.2092, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06865333836288193, |
|
"grad_norm": 8.892091751098633, |
|
"learning_rate": 6.627840772285784e-05, |
|
"loss": 5.4139, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06940777065258392, |
|
"grad_norm": 8.022160530090332, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 4.7068, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07016220294228594, |
|
"grad_norm": 10.1045560836792, |
|
"learning_rate": 6.461858523613684e-05, |
|
"loss": 5.8565, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07091663523198793, |
|
"grad_norm": 9.677287101745605, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 5.1281, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07167106752168993, |
|
"grad_norm": 10.185721397399902, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 5.3468, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07242549981139193, |
|
"grad_norm": 10.971282958984375, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 4.6916, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07317993210109393, |
|
"grad_norm": 11.636845588684082, |
|
"learning_rate": 6.124755271719325e-05, |
|
"loss": 4.4786, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07393436439079593, |
|
"grad_norm": 10.711810111999512, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 3.0341, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07468879668049792, |
|
"grad_norm": 8.94869613647461, |
|
"learning_rate": 5.9540449768827246e-05, |
|
"loss": 2.3939, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07544322897019992, |
|
"grad_norm": 8.477395057678223, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 1.6288, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07544322897019992, |
|
"eval_loss": 1.5527470111846924, |
|
"eval_runtime": 222.1555, |
|
"eval_samples_per_second": 10.052, |
|
"eval_steps_per_second": 5.028, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07619766125990192, |
|
"grad_norm": 12.187577247619629, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 6.6371, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07695209354960392, |
|
"grad_norm": 11.621190071105957, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 7.1557, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07770652583930593, |
|
"grad_norm": 9.95458698272705, |
|
"learning_rate": 5.6093467170257374e-05, |
|
"loss": 6.7466, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07846095812900793, |
|
"grad_norm": 7.798550605773926, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 6.3035, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07921539041870992, |
|
"grad_norm": 7.551530838012695, |
|
"learning_rate": 5.435778713738292e-05, |
|
"loss": 6.4349, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07996982270841192, |
|
"grad_norm": 7.4671630859375, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 6.3822, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08072425499811392, |
|
"grad_norm": 7.680654525756836, |
|
"learning_rate": 5.26167978121472e-05, |
|
"loss": 5.8368, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08147868728781592, |
|
"grad_norm": 7.623833656311035, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 6.2462, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08223311957751792, |
|
"grad_norm": 8.119511604309082, |
|
"learning_rate": 5.0872620321864185e-05, |
|
"loss": 5.8799, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08298755186721991, |
|
"grad_norm": 8.163238525390625, |
|
"learning_rate": 5e-05, |
|
"loss": 6.4398, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08374198415692191, |
|
"grad_norm": 7.675838947296143, |
|
"learning_rate": 4.912737967813583e-05, |
|
"loss": 6.1381, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08449641644662391, |
|
"grad_norm": 8.161046981811523, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 6.1432, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08525084873632592, |
|
"grad_norm": 8.056014060974121, |
|
"learning_rate": 4.738320218785281e-05, |
|
"loss": 5.9772, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08600528102602792, |
|
"grad_norm": 7.2145185470581055, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 5.7573, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08675971331572992, |
|
"grad_norm": 7.514006614685059, |
|
"learning_rate": 4.564221286261709e-05, |
|
"loss": 5.9614, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08751414560543191, |
|
"grad_norm": 7.510403633117676, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 5.3342, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08826857789513391, |
|
"grad_norm": 8.066449165344238, |
|
"learning_rate": 4.390653282974264e-05, |
|
"loss": 5.5604, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08902301018483591, |
|
"grad_norm": 6.986632347106934, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 5.5458, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08977744247453791, |
|
"grad_norm": 7.432390213012695, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 5.9329, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0905318747642399, |
|
"grad_norm": 7.102813243865967, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 5.7666, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0912863070539419, |
|
"grad_norm": 7.3202714920043945, |
|
"learning_rate": 4.045955023117276e-05, |
|
"loss": 5.5461, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0920407393436439, |
|
"grad_norm": 7.094704627990723, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 5.6711, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09279517163334591, |
|
"grad_norm": 6.997950077056885, |
|
"learning_rate": 3.875244728280676e-05, |
|
"loss": 6.0052, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09354960392304791, |
|
"grad_norm": 7.559538841247559, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 4.6827, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09430403621274991, |
|
"grad_norm": 7.945469379425049, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 5.421, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0950584685024519, |
|
"grad_norm": 7.207925796508789, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 5.95, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0958129007921539, |
|
"grad_norm": 7.7100725173950195, |
|
"learning_rate": 3.5381414763863166e-05, |
|
"loss": 6.2309, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0965673330818559, |
|
"grad_norm": 8.336740493774414, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 5.8438, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0973217653715579, |
|
"grad_norm": 7.9306511878967285, |
|
"learning_rate": 3.372159227714218e-05, |
|
"loss": 6.0361, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0980761976612599, |
|
"grad_norm": 8.01220417022705, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 6.1294, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0988306299509619, |
|
"grad_norm": 7.395640850067139, |
|
"learning_rate": 3.2081602522734986e-05, |
|
"loss": 5.6617, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0995850622406639, |
|
"grad_norm": 7.897842884063721, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 5.7985, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1003394945303659, |
|
"grad_norm": 8.189974784851074, |
|
"learning_rate": 3.046344357553632e-05, |
|
"loss": 4.9965, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1010939268200679, |
|
"grad_norm": 8.065415382385254, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 4.9086, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1018483591097699, |
|
"grad_norm": 8.694807052612305, |
|
"learning_rate": 2.886908691296504e-05, |
|
"loss": 4.776, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1026027913994719, |
|
"grad_norm": 7.551018714904785, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 5.4173, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1033572236891739, |
|
"grad_norm": 7.498192310333252, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 5.1536, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1041116559788759, |
|
"grad_norm": 7.734801769256592, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 4.447, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10486608826857789, |
|
"grad_norm": 8.053444862365723, |
|
"learning_rate": 2.575951898768315e-05, |
|
"loss": 5.1973, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10562052055827989, |
|
"grad_norm": 8.864779472351074, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 5.1369, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10637495284798189, |
|
"grad_norm": 8.096254348754883, |
|
"learning_rate": 2.4248096254497288e-05, |
|
"loss": 4.3606, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1071293851376839, |
|
"grad_norm": 8.422588348388672, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 5.2434, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1078838174273859, |
|
"grad_norm": 9.554398536682129, |
|
"learning_rate": 2.2768048249248648e-05, |
|
"loss": 4.9714, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1086382497170879, |
|
"grad_norm": 9.492046356201172, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 5.4207, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1093926820067899, |
|
"grad_norm": 9.344889640808105, |
|
"learning_rate": 2.132117818244771e-05, |
|
"loss": 4.7644, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11014711429649189, |
|
"grad_norm": 9.320569038391113, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 4.5768, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11090154658619389, |
|
"grad_norm": 9.480978965759277, |
|
"learning_rate": 1.9909248842397584e-05, |
|
"loss": 3.6032, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11165597887589589, |
|
"grad_norm": 9.053571701049805, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 3.3363, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11241041116559788, |
|
"grad_norm": 7.955657958984375, |
|
"learning_rate": 1.8533980447508137e-05, |
|
"loss": 2.2038, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11316484345529988, |
|
"grad_norm": 10.95832347869873, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 2.481, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11316484345529988, |
|
"eval_loss": 1.3595361709594727, |
|
"eval_runtime": 222.2461, |
|
"eval_samples_per_second": 10.047, |
|
"eval_steps_per_second": 5.026, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11391927574500188, |
|
"grad_norm": 7.0187482833862305, |
|
"learning_rate": 1.7197048550474643e-05, |
|
"loss": 5.7281, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11467370803470389, |
|
"grad_norm": 8.375664710998535, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 6.2015, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11542814032440589, |
|
"grad_norm": 8.071150779724121, |
|
"learning_rate": 1.5900081996875083e-05, |
|
"loss": 6.2623, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11618257261410789, |
|
"grad_norm": 7.718661308288574, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 6.0401, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11693700490380989, |
|
"grad_norm": 8.218561172485352, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 6.4259, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11769143719351188, |
|
"grad_norm": 7.594738960266113, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 6.5689, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11844586948321388, |
|
"grad_norm": 6.898587703704834, |
|
"learning_rate": 1.3432314919041478e-05, |
|
"loss": 5.5474, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11920030177291588, |
|
"grad_norm": 7.060200214385986, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 5.9418, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11995473406261788, |
|
"grad_norm": 7.08625602722168, |
|
"learning_rate": 1.22645209888614e-05, |
|
"loss": 5.9037, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12070916635231987, |
|
"grad_norm": 6.942751407623291, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 6.0381, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12146359864202187, |
|
"grad_norm": 7.1135406494140625, |
|
"learning_rate": 1.1142701927151456e-05, |
|
"loss": 6.1544, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12221803093172388, |
|
"grad_norm": 6.448284149169922, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 5.8762, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.12297246322142588, |
|
"grad_norm": 7.199798107147217, |
|
"learning_rate": 1.006822449763537e-05, |
|
"loss": 6.246, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.12372689551112788, |
|
"grad_norm": 7.024713039398193, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 6.028, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.12448132780082988, |
|
"grad_norm": 6.762689113616943, |
|
"learning_rate": 9.042397785550405e-06, |
|
"loss": 5.8399, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12523576009053186, |
|
"grad_norm": 6.662613391876221, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 6.0572, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12599019238023387, |
|
"grad_norm": 7.291405200958252, |
|
"learning_rate": 8.066471602728803e-06, |
|
"loss": 5.7887, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12674462466993588, |
|
"grad_norm": 8.169452667236328, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 6.4472, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12749905695963787, |
|
"grad_norm": 7.192326068878174, |
|
"learning_rate": 7.1416349648943894e-06, |
|
"loss": 5.6836, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12825348924933988, |
|
"grad_norm": 7.195889949798584, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 5.4774, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12900792153904186, |
|
"grad_norm": 7.8230204582214355, |
|
"learning_rate": 6.269014643030213e-06, |
|
"loss": 5.5761, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12976235382874388, |
|
"grad_norm": 7.158181667327881, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 6.0725, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.13051678611844586, |
|
"grad_norm": 7.191005706787109, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 6.3648, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.13127121840814787, |
|
"grad_norm": 7.722646236419678, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 5.8579, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13202565069784986, |
|
"grad_norm": 7.786721229553223, |
|
"learning_rate": 4.684610648167503e-06, |
|
"loss": 6.0884, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13278008298755187, |
|
"grad_norm": 7.266101360321045, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 5.5713, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.13353451527725388, |
|
"grad_norm": 8.33176326751709, |
|
"learning_rate": 3.974757327377981e-06, |
|
"loss": 6.0155, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.13428894756695586, |
|
"grad_norm": 7.038465976715088, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 5.3413, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.13504337985665787, |
|
"grad_norm": 7.81002140045166, |
|
"learning_rate": 3.3209786751399187e-06, |
|
"loss": 5.9929, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.13579781214635986, |
|
"grad_norm": 7.73304557800293, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 6.365, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13655224443606187, |
|
"grad_norm": 7.7701544761657715, |
|
"learning_rate": 2.724071220034158e-06, |
|
"loss": 5.4176, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13730667672576385, |
|
"grad_norm": 6.981159687042236, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 4.9839, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13806110901546587, |
|
"grad_norm": 7.628045558929443, |
|
"learning_rate": 2.1847622018482283e-06, |
|
"loss": 5.467, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13881554130516785, |
|
"grad_norm": 7.262263298034668, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 5.3167, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13956997359486986, |
|
"grad_norm": 8.176560401916504, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 5.5774, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14032440588457187, |
|
"grad_norm": 8.238152503967285, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 5.6847, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.14107883817427386, |
|
"grad_norm": 7.836312770843506, |
|
"learning_rate": 1.2814967607382432e-06, |
|
"loss": 5.6294, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.14183327046397587, |
|
"grad_norm": 8.08753490447998, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 5.3877, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14258770275367785, |
|
"grad_norm": 7.177961349487305, |
|
"learning_rate": 9.186408276168013e-07, |
|
"loss": 4.799, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.14334213504337986, |
|
"grad_norm": 7.347300052642822, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 4.6541, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14409656733308185, |
|
"grad_norm": 8.560412406921387, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 5.5527, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.14485099962278386, |
|
"grad_norm": 8.744789123535156, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 5.1754, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14560543191248584, |
|
"grad_norm": 8.127669334411621, |
|
"learning_rate": 3.7269241793390085e-07, |
|
"loss": 4.7211, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.14635986420218786, |
|
"grad_norm": 8.923094749450684, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 5.0364, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14711429649188984, |
|
"grad_norm": 9.52410888671875, |
|
"learning_rate": 1.9026509541272275e-07, |
|
"loss": 4.7301, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14786872878159185, |
|
"grad_norm": 9.446085929870605, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 4.8496, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14862316107129386, |
|
"grad_norm": 9.303170204162598, |
|
"learning_rate": 6.852326227130834e-08, |
|
"loss": 4.8698, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14937759336099585, |
|
"grad_norm": 10.187568664550781, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 3.2888, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15013202565069786, |
|
"grad_norm": 7.18109655380249, |
|
"learning_rate": 7.615242180436522e-09, |
|
"loss": 2.0294, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15088645794039984, |
|
"grad_norm": 7.182096004486084, |
|
"learning_rate": 0.0, |
|
"loss": 1.7007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15088645794039984, |
|
"eval_loss": 1.3203791379928589, |
|
"eval_runtime": 222.0788, |
|
"eval_samples_per_second": 10.055, |
|
"eval_steps_per_second": 5.03, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.865573512997765e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|