{ "best_metric": 0.8851153254508972, "best_model_checkpoint": "scanning-model-bert/checkpoint-7473", "epoch": 3.0, "eval_steps": 500, "global_step": 7473, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010036130068245684, "grad_norm": 1.1764743328094482, "learning_rate": 1.6711229946524065e-06, "loss": 2.4853, "step": 25 }, { "epoch": 0.02007226013649137, "grad_norm": 1.1972541809082031, "learning_rate": 3.342245989304813e-06, "loss": 2.4787, "step": 50 }, { "epoch": 0.030108390204737052, "grad_norm": 1.1974806785583496, "learning_rate": 5.013368983957219e-06, "loss": 2.4831, "step": 75 }, { "epoch": 0.04014452027298274, "grad_norm": 1.139513373374939, "learning_rate": 6.684491978609626e-06, "loss": 2.474, "step": 100 }, { "epoch": 0.05018065034122842, "grad_norm": 1.4917333126068115, "learning_rate": 8.355614973262032e-06, "loss": 2.4656, "step": 125 }, { "epoch": 0.060216780409474105, "grad_norm": 2.7864933013916016, "learning_rate": 1.0026737967914438e-05, "loss": 2.4514, "step": 150 }, { "epoch": 0.0702529104777198, "grad_norm": 1.9038527011871338, "learning_rate": 1.1697860962566845e-05, "loss": 2.4267, "step": 175 }, { "epoch": 0.08028904054596547, "grad_norm": 1.842796802520752, "learning_rate": 1.3368983957219252e-05, "loss": 2.3909, "step": 200 }, { "epoch": 0.09032517061421116, "grad_norm": 1.5834379196166992, "learning_rate": 1.5040106951871657e-05, "loss": 2.4023, "step": 225 }, { "epoch": 0.10036130068245684, "grad_norm": 1.8323915004730225, "learning_rate": 1.6711229946524065e-05, "loss": 2.3552, "step": 250 }, { "epoch": 0.11039743075070253, "grad_norm": 6.8411173820495605, "learning_rate": 1.8382352941176472e-05, "loss": 2.3116, "step": 275 }, { "epoch": 0.12043356081894821, "grad_norm": 3.661825656890869, "learning_rate": 2.0053475935828877e-05, "loss": 2.2989, "step": 300 }, { "epoch": 0.1304696908871939, "grad_norm": 2.0745482444763184, "learning_rate": 2.1724598930481285e-05, "loss": 2.3182, "step": 325 }, { "epoch": 0.1405058209554396, "grad_norm": 14.493195533752441, "learning_rate": 2.339572192513369e-05, "loss": 2.2371, "step": 350 }, { "epoch": 0.15054195102368526, "grad_norm": 4.515138149261475, "learning_rate": 2.5066844919786097e-05, "loss": 2.1893, "step": 375 }, { "epoch": 0.16057808109193095, "grad_norm": 4.537893295288086, "learning_rate": 2.6737967914438505e-05, "loss": 2.1654, "step": 400 }, { "epoch": 0.17061421116017664, "grad_norm": 6.065027236938477, "learning_rate": 2.8409090909090912e-05, "loss": 2.0774, "step": 425 }, { "epoch": 0.18065034122842233, "grad_norm": 3.3011465072631836, "learning_rate": 3.0080213903743313e-05, "loss": 2.1148, "step": 450 }, { "epoch": 0.190686471296668, "grad_norm": 3.5985777378082275, "learning_rate": 3.1751336898395725e-05, "loss": 2.1128, "step": 475 }, { "epoch": 0.20072260136491368, "grad_norm": 10.858529090881348, "learning_rate": 3.342245989304813e-05, "loss": 2.0861, "step": 500 }, { "epoch": 0.21075873143315937, "grad_norm": 5.10444450378418, "learning_rate": 3.509358288770054e-05, "loss": 2.0483, "step": 525 }, { "epoch": 0.22079486150140507, "grad_norm": 2.763580560684204, "learning_rate": 3.6764705882352945e-05, "loss": 1.9238, "step": 550 }, { "epoch": 0.23083099156965076, "grad_norm": 3.7232158184051514, "learning_rate": 3.843582887700535e-05, "loss": 1.8426, "step": 575 }, { "epoch": 0.24086712163789642, "grad_norm": 2.688575267791748, "learning_rate": 4.0106951871657754e-05, "loss": 1.87, "step": 600 }, { "epoch": 0.25090325170614214, "grad_norm": 3.5620744228363037, "learning_rate": 4.1778074866310165e-05, "loss": 1.8417, "step": 625 }, { "epoch": 0.2609393817743878, "grad_norm": 3.834071397781372, "learning_rate": 4.344919786096257e-05, "loss": 1.9035, "step": 650 }, { "epoch": 0.27097551184263347, "grad_norm": 6.310031414031982, "learning_rate": 4.512032085561498e-05, "loss": 1.83, "step": 675 }, { "epoch": 0.2810116419108792, "grad_norm": 10.144730567932129, "learning_rate": 4.679144385026738e-05, "loss": 1.8859, "step": 700 }, { "epoch": 0.29104777197912485, "grad_norm": 6.981380939483643, "learning_rate": 4.846256684491979e-05, "loss": 1.9067, "step": 725 }, { "epoch": 0.3010839020473705, "grad_norm": 6.095215797424316, "learning_rate": 4.9985130111524165e-05, "loss": 1.6395, "step": 750 }, { "epoch": 0.31112003211561623, "grad_norm": 9.925978660583496, "learning_rate": 4.979925650557621e-05, "loss": 1.7119, "step": 775 }, { "epoch": 0.3211561621838619, "grad_norm": 4.9185309410095215, "learning_rate": 4.961338289962826e-05, "loss": 1.6727, "step": 800 }, { "epoch": 0.3311922922521076, "grad_norm": 5.485770225524902, "learning_rate": 4.94275092936803e-05, "loss": 1.7179, "step": 825 }, { "epoch": 0.3412284223203533, "grad_norm": 4.848090648651123, "learning_rate": 4.9241635687732344e-05, "loss": 1.7757, "step": 850 }, { "epoch": 0.35126455238859894, "grad_norm": 4.81159782409668, "learning_rate": 4.905576208178439e-05, "loss": 1.7111, "step": 875 }, { "epoch": 0.36130068245684466, "grad_norm": 9.039434432983398, "learning_rate": 4.886988847583644e-05, "loss": 1.658, "step": 900 }, { "epoch": 0.3713368125250903, "grad_norm": 4.779675483703613, "learning_rate": 4.868401486988848e-05, "loss": 1.7113, "step": 925 }, { "epoch": 0.381372942593336, "grad_norm": 3.4945783615112305, "learning_rate": 4.849814126394052e-05, "loss": 1.5576, "step": 950 }, { "epoch": 0.3914090726615817, "grad_norm": 7.573122978210449, "learning_rate": 4.8312267657992566e-05, "loss": 1.5566, "step": 975 }, { "epoch": 0.40144520272982737, "grad_norm": 13.348621368408203, "learning_rate": 4.812639405204461e-05, "loss": 1.6002, "step": 1000 }, { "epoch": 0.4114813327980731, "grad_norm": 3.8586764335632324, "learning_rate": 4.794052044609665e-05, "loss": 1.4931, "step": 1025 }, { "epoch": 0.42151746286631875, "grad_norm": 8.584020614624023, "learning_rate": 4.77546468401487e-05, "loss": 1.58, "step": 1050 }, { "epoch": 0.4315535929345644, "grad_norm": 9.650528907775879, "learning_rate": 4.7568773234200745e-05, "loss": 1.5622, "step": 1075 }, { "epoch": 0.44158972300281013, "grad_norm": 5.387539863586426, "learning_rate": 4.7382899628252794e-05, "loss": 1.5855, "step": 1100 }, { "epoch": 0.4516258530710558, "grad_norm": 5.179405689239502, "learning_rate": 4.719702602230484e-05, "loss": 1.5094, "step": 1125 }, { "epoch": 0.4616619831393015, "grad_norm": 12.544023513793945, "learning_rate": 4.701115241635688e-05, "loss": 1.5518, "step": 1150 }, { "epoch": 0.4716981132075472, "grad_norm": 5.561625003814697, "learning_rate": 4.682527881040892e-05, "loss": 1.5438, "step": 1175 }, { "epoch": 0.48173424327579284, "grad_norm": 5.334999084472656, "learning_rate": 4.6639405204460966e-05, "loss": 1.4815, "step": 1200 }, { "epoch": 0.49177037334403856, "grad_norm": 5.292882919311523, "learning_rate": 4.645353159851301e-05, "loss": 1.5336, "step": 1225 }, { "epoch": 0.5018065034122843, "grad_norm": 4.656703948974609, "learning_rate": 4.626765799256505e-05, "loss": 1.4317, "step": 1250 }, { "epoch": 0.5118426334805299, "grad_norm": 6.756280422210693, "learning_rate": 4.60817843866171e-05, "loss": 1.4975, "step": 1275 }, { "epoch": 0.5218787635487756, "grad_norm": 6.566856861114502, "learning_rate": 4.589591078066915e-05, "loss": 1.4193, "step": 1300 }, { "epoch": 0.5319148936170213, "grad_norm": 14.7003812789917, "learning_rate": 4.5710037174721195e-05, "loss": 1.3279, "step": 1325 }, { "epoch": 0.5419510236852669, "grad_norm": 6.0828633308410645, "learning_rate": 4.552416356877324e-05, "loss": 1.508, "step": 1350 }, { "epoch": 0.5519871537535126, "grad_norm": 4.892170429229736, "learning_rate": 4.533828996282528e-05, "loss": 1.3173, "step": 1375 }, { "epoch": 0.5620232838217584, "grad_norm": 6.631337642669678, "learning_rate": 4.5152416356877324e-05, "loss": 1.3692, "step": 1400 }, { "epoch": 0.572059413890004, "grad_norm": 10.353917121887207, "learning_rate": 4.496654275092937e-05, "loss": 1.3371, "step": 1425 }, { "epoch": 0.5820955439582497, "grad_norm": 6.489056587219238, "learning_rate": 4.478066914498141e-05, "loss": 1.5097, "step": 1450 }, { "epoch": 0.5921316740264954, "grad_norm": 3.911562442779541, "learning_rate": 4.459479553903346e-05, "loss": 1.3946, "step": 1475 }, { "epoch": 0.602167804094741, "grad_norm": 7.033605098724365, "learning_rate": 4.440892193308551e-05, "loss": 1.3581, "step": 1500 }, { "epoch": 0.6122039341629868, "grad_norm": 24.549911499023438, "learning_rate": 4.422304832713755e-05, "loss": 1.4775, "step": 1525 }, { "epoch": 0.6222400642312325, "grad_norm": 5.103108882904053, "learning_rate": 4.4037174721189596e-05, "loss": 1.4047, "step": 1550 }, { "epoch": 0.6322761942994781, "grad_norm": 6.033442974090576, "learning_rate": 4.385130111524164e-05, "loss": 1.3348, "step": 1575 }, { "epoch": 0.6423123243677238, "grad_norm": 2.9465322494506836, "learning_rate": 4.366542750929368e-05, "loss": 1.3134, "step": 1600 }, { "epoch": 0.6523484544359694, "grad_norm": 6.447948455810547, "learning_rate": 4.3479553903345725e-05, "loss": 1.3922, "step": 1625 }, { "epoch": 0.6623845845042152, "grad_norm": 8.688138961791992, "learning_rate": 4.329368029739777e-05, "loss": 1.3806, "step": 1650 }, { "epoch": 0.6724207145724609, "grad_norm": 8.916844367980957, "learning_rate": 4.310780669144982e-05, "loss": 1.4429, "step": 1675 }, { "epoch": 0.6824568446407066, "grad_norm": 5.320075035095215, "learning_rate": 4.292193308550186e-05, "loss": 1.4155, "step": 1700 }, { "epoch": 0.6924929747089522, "grad_norm": 7.1480302810668945, "learning_rate": 4.2736059479553904e-05, "loss": 1.3846, "step": 1725 }, { "epoch": 0.7025291047771979, "grad_norm": 6.049670696258545, "learning_rate": 4.255018587360595e-05, "loss": 1.3116, "step": 1750 }, { "epoch": 0.7125652348454437, "grad_norm": 9.660329818725586, "learning_rate": 4.2364312267657996e-05, "loss": 1.1697, "step": 1775 }, { "epoch": 0.7226013649136893, "grad_norm": 3.5257277488708496, "learning_rate": 4.217843866171004e-05, "loss": 1.368, "step": 1800 }, { "epoch": 0.732637494981935, "grad_norm": 5.34630823135376, "learning_rate": 4.199256505576208e-05, "loss": 1.3725, "step": 1825 }, { "epoch": 0.7426736250501806, "grad_norm": 6.894128799438477, "learning_rate": 4.1806691449814125e-05, "loss": 1.1919, "step": 1850 }, { "epoch": 0.7527097551184263, "grad_norm": 5.004602432250977, "learning_rate": 4.1620817843866175e-05, "loss": 1.2233, "step": 1875 }, { "epoch": 0.762745885186672, "grad_norm": 7.174907684326172, "learning_rate": 4.143494423791822e-05, "loss": 1.3805, "step": 1900 }, { "epoch": 0.7727820152549177, "grad_norm": 5.870824337005615, "learning_rate": 4.124907063197026e-05, "loss": 1.2306, "step": 1925 }, { "epoch": 0.7828181453231634, "grad_norm": 13.816654205322266, "learning_rate": 4.1063197026022304e-05, "loss": 1.33, "step": 1950 }, { "epoch": 0.7928542753914091, "grad_norm": 6.733251571655273, "learning_rate": 4.0877323420074354e-05, "loss": 1.2333, "step": 1975 }, { "epoch": 0.8028904054596547, "grad_norm": 6.878822326660156, "learning_rate": 4.06914498141264e-05, "loss": 1.1058, "step": 2000 }, { "epoch": 0.8129265355279004, "grad_norm": 5.60182523727417, "learning_rate": 4.050557620817844e-05, "loss": 1.3247, "step": 2025 }, { "epoch": 0.8229626655961462, "grad_norm": 5.8014655113220215, "learning_rate": 4.031970260223048e-05, "loss": 1.2512, "step": 2050 }, { "epoch": 0.8329987956643918, "grad_norm": 8.3912992477417, "learning_rate": 4.013382899628253e-05, "loss": 1.2348, "step": 2075 }, { "epoch": 0.8430349257326375, "grad_norm": 4.4156270027160645, "learning_rate": 3.9947955390334576e-05, "loss": 1.2844, "step": 2100 }, { "epoch": 0.8530710558008832, "grad_norm": 12.413702011108398, "learning_rate": 3.976208178438662e-05, "loss": 1.192, "step": 2125 }, { "epoch": 0.8631071858691288, "grad_norm": 5.390079498291016, "learning_rate": 3.957620817843866e-05, "loss": 1.2788, "step": 2150 }, { "epoch": 0.8731433159373746, "grad_norm": 12.286083221435547, "learning_rate": 3.9390334572490705e-05, "loss": 1.272, "step": 2175 }, { "epoch": 0.8831794460056203, "grad_norm": 3.3497118949890137, "learning_rate": 3.9204460966542755e-05, "loss": 1.2199, "step": 2200 }, { "epoch": 0.8932155760738659, "grad_norm": 16.91133689880371, "learning_rate": 3.90185873605948e-05, "loss": 1.1223, "step": 2225 }, { "epoch": 0.9032517061421116, "grad_norm": 4.592910289764404, "learning_rate": 3.883271375464684e-05, "loss": 1.2841, "step": 2250 }, { "epoch": 0.9132878362103573, "grad_norm": 7.878051280975342, "learning_rate": 3.864684014869889e-05, "loss": 1.1558, "step": 2275 }, { "epoch": 0.923323966278603, "grad_norm": 6.925229072570801, "learning_rate": 3.8460966542750934e-05, "loss": 1.1348, "step": 2300 }, { "epoch": 0.9333600963468487, "grad_norm": 6.3386359214782715, "learning_rate": 3.8275092936802977e-05, "loss": 1.2382, "step": 2325 }, { "epoch": 0.9433962264150944, "grad_norm": 6.875855922698975, "learning_rate": 3.808921933085502e-05, "loss": 1.0859, "step": 2350 }, { "epoch": 0.95343235648334, "grad_norm": 5.8908305168151855, "learning_rate": 3.790334572490706e-05, "loss": 1.2598, "step": 2375 }, { "epoch": 0.9634684865515857, "grad_norm": 10.769807815551758, "learning_rate": 3.7717472118959106e-05, "loss": 1.1719, "step": 2400 }, { "epoch": 0.9735046166198313, "grad_norm": 4.296354293823242, "learning_rate": 3.7531598513011155e-05, "loss": 1.099, "step": 2425 }, { "epoch": 0.9835407466880771, "grad_norm": 6.258970260620117, "learning_rate": 3.73457249070632e-05, "loss": 1.2804, "step": 2450 }, { "epoch": 0.9935768767563228, "grad_norm": 5.117431163787842, "learning_rate": 3.715985130111525e-05, "loss": 1.1818, "step": 2475 }, { "epoch": 1.0, "eval_accuracy": 0.6057808109193095, "eval_f1_macro": 0.5983023090636659, "eval_f1_micro": 0.6057808109193095, "eval_f1_weighted": 0.5969621030224332, "eval_loss": 1.129994511604309, "eval_precision_macro": 0.6126689914548499, "eval_precision_micro": 0.6057808109193095, "eval_precision_weighted": 0.6110410923243241, "eval_recall_macro": 0.6066459481100611, "eval_recall_micro": 0.6057808109193095, "eval_recall_weighted": 0.6057808109193095, "eval_runtime": 797.1394, "eval_samples_per_second": 6.25, "eval_steps_per_second": 0.391, "step": 2491 }, { "epoch": 1.0036130068245686, "grad_norm": 6.499170303344727, "learning_rate": 3.697397769516729e-05, "loss": 1.1431, "step": 2500 }, { "epoch": 1.0136491368928142, "grad_norm": 9.59432601928711, "learning_rate": 3.6788104089219334e-05, "loss": 1.1769, "step": 2525 }, { "epoch": 1.0236852669610599, "grad_norm": 19.949962615966797, "learning_rate": 3.660223048327138e-05, "loss": 1.0948, "step": 2550 }, { "epoch": 1.0337213970293055, "grad_norm": 14.18024730682373, "learning_rate": 3.641635687732342e-05, "loss": 1.2609, "step": 2575 }, { "epoch": 1.0437575270975512, "grad_norm": 11.269380569458008, "learning_rate": 3.623048327137546e-05, "loss": 1.0704, "step": 2600 }, { "epoch": 1.0537936571657969, "grad_norm": 5.695765018463135, "learning_rate": 3.6044609665427506e-05, "loss": 1.0063, "step": 2625 }, { "epoch": 1.0638297872340425, "grad_norm": 14.094305038452148, "learning_rate": 3.5858736059479556e-05, "loss": 1.2925, "step": 2650 }, { "epoch": 1.0738659173022882, "grad_norm": 12.211389541625977, "learning_rate": 3.5672862453531606e-05, "loss": 1.0322, "step": 2675 }, { "epoch": 1.0839020473705339, "grad_norm": 13.703721046447754, "learning_rate": 3.548698884758365e-05, "loss": 1.1343, "step": 2700 }, { "epoch": 1.0939381774387795, "grad_norm": 9.662201881408691, "learning_rate": 3.530111524163569e-05, "loss": 1.0711, "step": 2725 }, { "epoch": 1.1039743075070252, "grad_norm": 5.344089508056641, "learning_rate": 3.5115241635687735e-05, "loss": 1.2139, "step": 2750 }, { "epoch": 1.114010437575271, "grad_norm": 8.1133394241333, "learning_rate": 3.492936802973978e-05, "loss": 1.1404, "step": 2775 }, { "epoch": 1.1240465676435167, "grad_norm": 8.59904670715332, "learning_rate": 3.474349442379182e-05, "loss": 1.2904, "step": 2800 }, { "epoch": 1.1340826977117624, "grad_norm": 7.583441257476807, "learning_rate": 3.4557620817843864e-05, "loss": 1.1021, "step": 2825 }, { "epoch": 1.144118827780008, "grad_norm": 4.938253879547119, "learning_rate": 3.4371747211895914e-05, "loss": 1.0878, "step": 2850 }, { "epoch": 1.1541549578482537, "grad_norm": 9.23643684387207, "learning_rate": 3.418587360594796e-05, "loss": 1.1033, "step": 2875 }, { "epoch": 1.1641910879164994, "grad_norm": 6.702592372894287, "learning_rate": 3.4000000000000007e-05, "loss": 1.0389, "step": 2900 }, { "epoch": 1.174227217984745, "grad_norm": 7.392385959625244, "learning_rate": 3.381412639405205e-05, "loss": 1.0155, "step": 2925 }, { "epoch": 1.1842633480529907, "grad_norm": 27.505746841430664, "learning_rate": 3.362825278810409e-05, "loss": 1.2215, "step": 2950 }, { "epoch": 1.1942994781212364, "grad_norm": 14.125563621520996, "learning_rate": 3.3442379182156136e-05, "loss": 1.0795, "step": 2975 }, { "epoch": 1.204335608189482, "grad_norm": 9.185543060302734, "learning_rate": 3.325650557620818e-05, "loss": 1.1799, "step": 3000 }, { "epoch": 1.2143717382577277, "grad_norm": 9.01848316192627, "learning_rate": 3.307063197026022e-05, "loss": 1.212, "step": 3025 }, { "epoch": 1.2244078683259736, "grad_norm": 9.692822456359863, "learning_rate": 3.288475836431227e-05, "loss": 1.0784, "step": 3050 }, { "epoch": 1.2344439983942193, "grad_norm": 20.630401611328125, "learning_rate": 3.2698884758364314e-05, "loss": 1.2005, "step": 3075 }, { "epoch": 1.244480128462465, "grad_norm": 8.891132354736328, "learning_rate": 3.251301115241636e-05, "loss": 1.1043, "step": 3100 }, { "epoch": 1.2545162585307106, "grad_norm": 7.9135308265686035, "learning_rate": 3.232713754646841e-05, "loss": 1.006, "step": 3125 }, { "epoch": 1.2645523885989562, "grad_norm": 7.054498195648193, "learning_rate": 3.214126394052045e-05, "loss": 1.0986, "step": 3150 }, { "epoch": 1.274588518667202, "grad_norm": 9.929298400878906, "learning_rate": 3.195539033457249e-05, "loss": 1.0762, "step": 3175 }, { "epoch": 1.2846246487354476, "grad_norm": 7.586366653442383, "learning_rate": 3.1769516728624536e-05, "loss": 0.9468, "step": 3200 }, { "epoch": 1.2946607788036932, "grad_norm": 14.547595024108887, "learning_rate": 3.158364312267658e-05, "loss": 1.0565, "step": 3225 }, { "epoch": 1.304696908871939, "grad_norm": 6.774365425109863, "learning_rate": 3.139776951672863e-05, "loss": 1.0749, "step": 3250 }, { "epoch": 1.3147330389401848, "grad_norm": 6.633537292480469, "learning_rate": 3.121189591078067e-05, "loss": 0.9786, "step": 3275 }, { "epoch": 1.3247691690084302, "grad_norm": 9.134368896484375, "learning_rate": 3.1026022304832715e-05, "loss": 1.1634, "step": 3300 }, { "epoch": 1.334805299076676, "grad_norm": 5.2640228271484375, "learning_rate": 3.084014869888476e-05, "loss": 1.1088, "step": 3325 }, { "epoch": 1.3448414291449218, "grad_norm": 12.46059513092041, "learning_rate": 3.065427509293681e-05, "loss": 0.9776, "step": 3350 }, { "epoch": 1.3548775592131674, "grad_norm": 10.393540382385254, "learning_rate": 3.046840148698885e-05, "loss": 1.083, "step": 3375 }, { "epoch": 1.364913689281413, "grad_norm": 9.62662124633789, "learning_rate": 3.0282527881040894e-05, "loss": 1.0039, "step": 3400 }, { "epoch": 1.3749498193496588, "grad_norm": 6.393442630767822, "learning_rate": 3.009665427509294e-05, "loss": 1.0858, "step": 3425 }, { "epoch": 1.3849859494179044, "grad_norm": 6.460680961608887, "learning_rate": 2.9910780669144983e-05, "loss": 0.8974, "step": 3450 }, { "epoch": 1.39502207948615, "grad_norm": 19.161996841430664, "learning_rate": 2.9724907063197026e-05, "loss": 0.9708, "step": 3475 }, { "epoch": 1.4050582095543958, "grad_norm": 5.401683330535889, "learning_rate": 2.9539033457249073e-05, "loss": 0.8682, "step": 3500 }, { "epoch": 1.4150943396226414, "grad_norm": 10.808605194091797, "learning_rate": 2.9353159851301116e-05, "loss": 0.9252, "step": 3525 }, { "epoch": 1.4251304696908873, "grad_norm": 15.710016250610352, "learning_rate": 2.916728624535316e-05, "loss": 1.1788, "step": 3550 }, { "epoch": 1.4351665997591327, "grad_norm": 8.340784072875977, "learning_rate": 2.8981412639405202e-05, "loss": 1.0412, "step": 3575 }, { "epoch": 1.4452027298273786, "grad_norm": 6.99077033996582, "learning_rate": 2.879553903345725e-05, "loss": 1.0227, "step": 3600 }, { "epoch": 1.4552388598956243, "grad_norm": 6.2852959632873535, "learning_rate": 2.8609665427509298e-05, "loss": 0.8811, "step": 3625 }, { "epoch": 1.46527498996387, "grad_norm": 6.518800258636475, "learning_rate": 2.842379182156134e-05, "loss": 1.0829, "step": 3650 }, { "epoch": 1.4753111200321156, "grad_norm": 12.849925994873047, "learning_rate": 2.8237918215613384e-05, "loss": 1.1139, "step": 3675 }, { "epoch": 1.4853472501003613, "grad_norm": 8.003408432006836, "learning_rate": 2.805204460966543e-05, "loss": 1.0784, "step": 3700 }, { "epoch": 1.495383380168607, "grad_norm": 4.201417922973633, "learning_rate": 2.7866171003717473e-05, "loss": 1.1086, "step": 3725 }, { "epoch": 1.5054195102368526, "grad_norm": 10.88537311553955, "learning_rate": 2.7680297397769516e-05, "loss": 1.0667, "step": 3750 }, { "epoch": 1.5154556403050985, "grad_norm": 5.831775665283203, "learning_rate": 2.749442379182156e-05, "loss": 1.1309, "step": 3775 }, { "epoch": 1.525491770373344, "grad_norm": 15.371512413024902, "learning_rate": 2.7308550185873606e-05, "loss": 0.991, "step": 3800 }, { "epoch": 1.5355279004415898, "grad_norm": 12.852242469787598, "learning_rate": 2.7122676579925656e-05, "loss": 0.9607, "step": 3825 }, { "epoch": 1.5455640305098353, "grad_norm": 6.148252964019775, "learning_rate": 2.69368029739777e-05, "loss": 1.13, "step": 3850 }, { "epoch": 1.5556001605780811, "grad_norm": 9.267879486083984, "learning_rate": 2.675092936802974e-05, "loss": 1.0981, "step": 3875 }, { "epoch": 1.5656362906463268, "grad_norm": 3.6907107830047607, "learning_rate": 2.6565055762081788e-05, "loss": 1.123, "step": 3900 }, { "epoch": 1.5756724207145725, "grad_norm": 13.81143569946289, "learning_rate": 2.637918215613383e-05, "loss": 0.9826, "step": 3925 }, { "epoch": 1.5857085507828181, "grad_norm": 10.353007316589355, "learning_rate": 2.6193308550185874e-05, "loss": 1.0302, "step": 3950 }, { "epoch": 1.5957446808510638, "grad_norm": 6.6601457595825195, "learning_rate": 2.6007434944237917e-05, "loss": 0.9897, "step": 3975 }, { "epoch": 1.6057808109193095, "grad_norm": 10.227742195129395, "learning_rate": 2.5821561338289963e-05, "loss": 0.939, "step": 4000 }, { "epoch": 1.6158169409875551, "grad_norm": 12.598037719726562, "learning_rate": 2.5635687732342007e-05, "loss": 1.0387, "step": 4025 }, { "epoch": 1.625853071055801, "grad_norm": 10.454833030700684, "learning_rate": 2.5449814126394056e-05, "loss": 0.9812, "step": 4050 }, { "epoch": 1.6358892011240465, "grad_norm": 7.645059585571289, "learning_rate": 2.52639405204461e-05, "loss": 1.009, "step": 4075 }, { "epoch": 1.6459253311922923, "grad_norm": 6.201138973236084, "learning_rate": 2.5078066914498142e-05, "loss": 1.0306, "step": 4100 }, { "epoch": 1.6559614612605378, "grad_norm": 12.0375394821167, "learning_rate": 2.489219330855019e-05, "loss": 1.013, "step": 4125 }, { "epoch": 1.6659975913287837, "grad_norm": 8.109641075134277, "learning_rate": 2.4706319702602232e-05, "loss": 0.8755, "step": 4150 }, { "epoch": 1.6760337213970293, "grad_norm": 13.750313758850098, "learning_rate": 2.4520446096654275e-05, "loss": 0.9282, "step": 4175 }, { "epoch": 1.686069851465275, "grad_norm": 3.3968727588653564, "learning_rate": 2.433457249070632e-05, "loss": 1.0483, "step": 4200 }, { "epoch": 1.6961059815335207, "grad_norm": 8.283858299255371, "learning_rate": 2.4148698884758368e-05, "loss": 1.1133, "step": 4225 }, { "epoch": 1.7061421116017663, "grad_norm": 5.217039108276367, "learning_rate": 2.396282527881041e-05, "loss": 0.8489, "step": 4250 }, { "epoch": 1.7161782416700122, "grad_norm": 4.49629545211792, "learning_rate": 2.3776951672862454e-05, "loss": 0.743, "step": 4275 }, { "epoch": 1.7262143717382576, "grad_norm": 7.107280254364014, "learning_rate": 2.35910780669145e-05, "loss": 0.9398, "step": 4300 }, { "epoch": 1.7362505018065035, "grad_norm": 3.9583284854888916, "learning_rate": 2.3405204460966543e-05, "loss": 0.961, "step": 4325 }, { "epoch": 1.746286631874749, "grad_norm": 10.223085403442383, "learning_rate": 2.321933085501859e-05, "loss": 1.1148, "step": 4350 }, { "epoch": 1.7563227619429949, "grad_norm": 7.453252792358398, "learning_rate": 2.3033457249070632e-05, "loss": 1.0246, "step": 4375 }, { "epoch": 1.7663588920112403, "grad_norm": 4.716647148132324, "learning_rate": 2.284758364312268e-05, "loss": 0.929, "step": 4400 }, { "epoch": 1.7763950220794862, "grad_norm": 6.915979862213135, "learning_rate": 2.2661710037174722e-05, "loss": 0.9241, "step": 4425 }, { "epoch": 1.7864311521477318, "grad_norm": 15.455207824707031, "learning_rate": 2.2475836431226765e-05, "loss": 0.9634, "step": 4450 }, { "epoch": 1.7964672822159775, "grad_norm": 14.850927352905273, "learning_rate": 2.228996282527881e-05, "loss": 0.9322, "step": 4475 }, { "epoch": 1.8065034122842232, "grad_norm": 7.982198715209961, "learning_rate": 2.2104089219330858e-05, "loss": 0.9881, "step": 4500 }, { "epoch": 1.8165395423524688, "grad_norm": 5.206161022186279, "learning_rate": 2.19182156133829e-05, "loss": 1.0773, "step": 4525 }, { "epoch": 1.8265756724207147, "grad_norm": 5.201132297515869, "learning_rate": 2.1732342007434944e-05, "loss": 0.9938, "step": 4550 }, { "epoch": 1.8366118024889602, "grad_norm": 7.117099761962891, "learning_rate": 2.154646840148699e-05, "loss": 1.0281, "step": 4575 }, { "epoch": 1.846647932557206, "grad_norm": 8.35094165802002, "learning_rate": 2.1360594795539036e-05, "loss": 0.922, "step": 4600 }, { "epoch": 1.8566840626254515, "grad_norm": 8.167677879333496, "learning_rate": 2.117472118959108e-05, "loss": 0.9462, "step": 4625 }, { "epoch": 1.8667201926936974, "grad_norm": 10.520957946777344, "learning_rate": 2.0988847583643123e-05, "loss": 1.0836, "step": 4650 }, { "epoch": 1.876756322761943, "grad_norm": 34.1854248046875, "learning_rate": 2.0802973977695166e-05, "loss": 0.9133, "step": 4675 }, { "epoch": 1.8867924528301887, "grad_norm": 6.081179618835449, "learning_rate": 2.0617100371747215e-05, "loss": 0.983, "step": 4700 }, { "epoch": 1.8968285828984344, "grad_norm": 10.599027633666992, "learning_rate": 2.043122676579926e-05, "loss": 0.9998, "step": 4725 }, { "epoch": 1.90686471296668, "grad_norm": 9.57131290435791, "learning_rate": 2.02453531598513e-05, "loss": 0.9191, "step": 4750 }, { "epoch": 1.9169008430349257, "grad_norm": 6.110264778137207, "learning_rate": 2.0059479553903344e-05, "loss": 0.8917, "step": 4775 }, { "epoch": 1.9269369731031714, "grad_norm": 10.64118766784668, "learning_rate": 1.9873605947955394e-05, "loss": 0.9154, "step": 4800 }, { "epoch": 1.9369731031714172, "grad_norm": 18.218952178955078, "learning_rate": 1.9687732342007437e-05, "loss": 0.9216, "step": 4825 }, { "epoch": 1.9470092332396627, "grad_norm": 5.614959716796875, "learning_rate": 1.950185873605948e-05, "loss": 1.0401, "step": 4850 }, { "epoch": 1.9570453633079086, "grad_norm": 9.277515411376953, "learning_rate": 1.9315985130111523e-05, "loss": 1.1864, "step": 4875 }, { "epoch": 1.967081493376154, "grad_norm": 14.814881324768066, "learning_rate": 1.913011152416357e-05, "loss": 0.9891, "step": 4900 }, { "epoch": 1.9771176234444, "grad_norm": 10.074165344238281, "learning_rate": 1.8944237918215616e-05, "loss": 0.9461, "step": 4925 }, { "epoch": 1.9871537535126456, "grad_norm": 8.03749942779541, "learning_rate": 1.875836431226766e-05, "loss": 0.8771, "step": 4950 }, { "epoch": 1.9971898835808912, "grad_norm": 3.215528726577759, "learning_rate": 1.8572490706319702e-05, "loss": 0.8763, "step": 4975 }, { "epoch": 2.0, "eval_accuracy": 0.6547571256523484, "eval_f1_macro": 0.6500225040595868, "eval_f1_micro": 0.6547571256523484, "eval_f1_weighted": 0.6488410398009357, "eval_loss": 0.9818471670150757, "eval_precision_macro": 0.6640770883783581, "eval_precision_micro": 0.6547571256523484, "eval_precision_weighted": 0.662792396194005, "eval_recall_macro": 0.6557225665163219, "eval_recall_micro": 0.6547571256523484, "eval_recall_weighted": 0.6547571256523484, "eval_runtime": 851.1654, "eval_samples_per_second": 5.853, "eval_steps_per_second": 0.367, "step": 4982 }, { "epoch": 2.007226013649137, "grad_norm": 8.038765907287598, "learning_rate": 1.838661710037175e-05, "loss": 0.9551, "step": 5000 }, { "epoch": 2.0172621437173825, "grad_norm": 7.78813362121582, "learning_rate": 1.820074349442379e-05, "loss": 0.8925, "step": 5025 }, { "epoch": 2.0272982737856284, "grad_norm": 7.722963333129883, "learning_rate": 1.8014869888475838e-05, "loss": 0.8987, "step": 5050 }, { "epoch": 2.037334403853874, "grad_norm": 10.702392578125, "learning_rate": 1.782899628252788e-05, "loss": 0.8756, "step": 5075 }, { "epoch": 2.0473705339221198, "grad_norm": 5.403848171234131, "learning_rate": 1.7643122676579927e-05, "loss": 0.9061, "step": 5100 }, { "epoch": 2.057406663990365, "grad_norm": 10.365225791931152, "learning_rate": 1.745724907063197e-05, "loss": 0.8466, "step": 5125 }, { "epoch": 2.067442794058611, "grad_norm": 3.015634775161743, "learning_rate": 1.7271375464684017e-05, "loss": 0.7796, "step": 5150 }, { "epoch": 2.0774789241268565, "grad_norm": 11.44224739074707, "learning_rate": 1.708550185873606e-05, "loss": 0.7642, "step": 5175 }, { "epoch": 2.0875150541951024, "grad_norm": 8.612204551696777, "learning_rate": 1.6899628252788106e-05, "loss": 0.8178, "step": 5200 }, { "epoch": 2.097551184263348, "grad_norm": 13.947504997253418, "learning_rate": 1.671375464684015e-05, "loss": 0.7342, "step": 5225 }, { "epoch": 2.1075873143315937, "grad_norm": 5.7223663330078125, "learning_rate": 1.6527881040892192e-05, "loss": 1.0052, "step": 5250 }, { "epoch": 2.1176234443998396, "grad_norm": 6.79681921005249, "learning_rate": 1.634200743494424e-05, "loss": 0.858, "step": 5275 }, { "epoch": 2.127659574468085, "grad_norm": 10.765701293945312, "learning_rate": 1.6156133828996285e-05, "loss": 0.7969, "step": 5300 }, { "epoch": 2.137695704536331, "grad_norm": 5.018877029418945, "learning_rate": 1.5970260223048328e-05, "loss": 0.7923, "step": 5325 }, { "epoch": 2.1477318346045764, "grad_norm": 10.32058334350586, "learning_rate": 1.578438661710037e-05, "loss": 0.9131, "step": 5350 }, { "epoch": 2.1577679646728223, "grad_norm": 4.6106061935424805, "learning_rate": 1.5598513011152417e-05, "loss": 0.8826, "step": 5375 }, { "epoch": 2.1678040947410677, "grad_norm": 10.13034439086914, "learning_rate": 1.5412639405204464e-05, "loss": 0.8108, "step": 5400 }, { "epoch": 2.1778402248093136, "grad_norm": 10.367287635803223, "learning_rate": 1.5226765799256507e-05, "loss": 0.8552, "step": 5425 }, { "epoch": 2.187876354877559, "grad_norm": 13.695893287658691, "learning_rate": 1.504089219330855e-05, "loss": 0.8305, "step": 5450 }, { "epoch": 2.197912484945805, "grad_norm": 7.799850940704346, "learning_rate": 1.4855018587360594e-05, "loss": 0.9426, "step": 5475 }, { "epoch": 2.2079486150140504, "grad_norm": 7.746633529663086, "learning_rate": 1.4669144981412641e-05, "loss": 0.8498, "step": 5500 }, { "epoch": 2.2179847450822963, "grad_norm": 3.6280875205993652, "learning_rate": 1.4483271375464686e-05, "loss": 0.7575, "step": 5525 }, { "epoch": 2.228020875150542, "grad_norm": 12.937957763671875, "learning_rate": 1.4297397769516729e-05, "loss": 0.7432, "step": 5550 }, { "epoch": 2.2380570052187876, "grad_norm": 12.451675415039062, "learning_rate": 1.4111524163568773e-05, "loss": 0.8524, "step": 5575 }, { "epoch": 2.2480931352870335, "grad_norm": 6.163950443267822, "learning_rate": 1.3925650557620818e-05, "loss": 0.7289, "step": 5600 }, { "epoch": 2.258129265355279, "grad_norm": 13.072674751281738, "learning_rate": 1.3739776951672864e-05, "loss": 0.863, "step": 5625 }, { "epoch": 2.268165395423525, "grad_norm": 8.630057334899902, "learning_rate": 1.3553903345724907e-05, "loss": 0.8568, "step": 5650 }, { "epoch": 2.2782015254917702, "grad_norm": 4.900511741638184, "learning_rate": 1.3368029739776952e-05, "loss": 0.8374, "step": 5675 }, { "epoch": 2.288237655560016, "grad_norm": 9.796160697937012, "learning_rate": 1.3182156133828997e-05, "loss": 0.7818, "step": 5700 }, { "epoch": 2.2982737856282616, "grad_norm": 7.636236667633057, "learning_rate": 1.299628252788104e-05, "loss": 0.9105, "step": 5725 }, { "epoch": 2.3083099156965075, "grad_norm": 9.415719032287598, "learning_rate": 1.2810408921933086e-05, "loss": 0.8107, "step": 5750 }, { "epoch": 2.318346045764753, "grad_norm": 8.357912063598633, "learning_rate": 1.2624535315985131e-05, "loss": 0.7744, "step": 5775 }, { "epoch": 2.328382175832999, "grad_norm": 8.900328636169434, "learning_rate": 1.2438661710037176e-05, "loss": 0.8166, "step": 5800 }, { "epoch": 2.3384183059012447, "grad_norm": 9.10429859161377, "learning_rate": 1.225278810408922e-05, "loss": 0.8207, "step": 5825 }, { "epoch": 2.34845443596949, "grad_norm": 9.185796737670898, "learning_rate": 1.2066914498141265e-05, "loss": 0.8174, "step": 5850 }, { "epoch": 2.358490566037736, "grad_norm": 8.934030532836914, "learning_rate": 1.188104089219331e-05, "loss": 0.8926, "step": 5875 }, { "epoch": 2.3685266961059814, "grad_norm": 7.495822906494141, "learning_rate": 1.1695167286245355e-05, "loss": 0.7944, "step": 5900 }, { "epoch": 2.3785628261742273, "grad_norm": 4.582374095916748, "learning_rate": 1.1509293680297398e-05, "loss": 0.8671, "step": 5925 }, { "epoch": 2.3885989562424728, "grad_norm": 12.170183181762695, "learning_rate": 1.1323420074349444e-05, "loss": 0.9016, "step": 5950 }, { "epoch": 2.3986350863107186, "grad_norm": 3.8928985595703125, "learning_rate": 1.1137546468401487e-05, "loss": 0.8334, "step": 5975 }, { "epoch": 2.408671216378964, "grad_norm": 11.172091484069824, "learning_rate": 1.0951672862453533e-05, "loss": 0.7836, "step": 6000 }, { "epoch": 2.41870734644721, "grad_norm": 6.816530227661133, "learning_rate": 1.0765799256505576e-05, "loss": 0.8845, "step": 6025 }, { "epoch": 2.4287434765154554, "grad_norm": 7.811573505401611, "learning_rate": 1.0579925650557623e-05, "loss": 0.8348, "step": 6050 }, { "epoch": 2.4387796065837013, "grad_norm": 11.93806266784668, "learning_rate": 1.0394052044609666e-05, "loss": 0.8122, "step": 6075 }, { "epoch": 2.448815736651947, "grad_norm": 2.4777565002441406, "learning_rate": 1.020817843866171e-05, "loss": 0.8031, "step": 6100 }, { "epoch": 2.4588518667201926, "grad_norm": 15.819439888000488, "learning_rate": 1.0022304832713755e-05, "loss": 0.7895, "step": 6125 }, { "epoch": 2.4688879967884385, "grad_norm": 6.035953998565674, "learning_rate": 9.8364312267658e-06, "loss": 0.8489, "step": 6150 }, { "epoch": 2.478924126856684, "grad_norm": 4.4093427658081055, "learning_rate": 9.650557620817845e-06, "loss": 0.7383, "step": 6175 }, { "epoch": 2.48896025692493, "grad_norm": 18.50689125061035, "learning_rate": 9.46468401486989e-06, "loss": 0.8581, "step": 6200 }, { "epoch": 2.4989963869931753, "grad_norm": 13.305225372314453, "learning_rate": 9.278810408921934e-06, "loss": 0.6993, "step": 6225 }, { "epoch": 2.509032517061421, "grad_norm": 11.539031028747559, "learning_rate": 9.092936802973979e-06, "loss": 0.8224, "step": 6250 }, { "epoch": 2.519068647129667, "grad_norm": 12.486981391906738, "learning_rate": 8.907063197026022e-06, "loss": 0.7293, "step": 6275 }, { "epoch": 2.5291047771979125, "grad_norm": 9.520222663879395, "learning_rate": 8.721189591078068e-06, "loss": 0.7651, "step": 6300 }, { "epoch": 2.539140907266158, "grad_norm": 8.691309928894043, "learning_rate": 8.535315985130111e-06, "loss": 0.7133, "step": 6325 }, { "epoch": 2.549177037334404, "grad_norm": 10.589598655700684, "learning_rate": 8.349442379182158e-06, "loss": 0.7406, "step": 6350 }, { "epoch": 2.5592131674026497, "grad_norm": 9.358379364013672, "learning_rate": 8.1635687732342e-06, "loss": 0.7483, "step": 6375 }, { "epoch": 2.569249297470895, "grad_norm": 4.712927341461182, "learning_rate": 7.977695167286247e-06, "loss": 0.7921, "step": 6400 }, { "epoch": 2.579285427539141, "grad_norm": 10.725476264953613, "learning_rate": 7.79182156133829e-06, "loss": 0.7842, "step": 6425 }, { "epoch": 2.5893215576073865, "grad_norm": 5.546850204467773, "learning_rate": 7.605947955390335e-06, "loss": 0.797, "step": 6450 }, { "epoch": 2.5993576876756324, "grad_norm": 9.5972261428833, "learning_rate": 7.420074349442379e-06, "loss": 0.8795, "step": 6475 }, { "epoch": 2.609393817743878, "grad_norm": 18.99161720275879, "learning_rate": 7.234200743494423e-06, "loss": 0.8776, "step": 6500 }, { "epoch": 2.6194299478121237, "grad_norm": 12.693746566772461, "learning_rate": 7.048327137546469e-06, "loss": 0.921, "step": 6525 }, { "epoch": 2.6294660778803696, "grad_norm": 7.012732028961182, "learning_rate": 6.862453531598513e-06, "loss": 0.9028, "step": 6550 }, { "epoch": 2.639502207948615, "grad_norm": 3.836461305618286, "learning_rate": 6.676579925650558e-06, "loss": 0.7473, "step": 6575 }, { "epoch": 2.6495383380168605, "grad_norm": 5.454137802124023, "learning_rate": 6.490706319702602e-06, "loss": 0.8219, "step": 6600 }, { "epoch": 2.6595744680851063, "grad_norm": 9.578851699829102, "learning_rate": 6.304832713754647e-06, "loss": 0.7777, "step": 6625 }, { "epoch": 2.669610598153352, "grad_norm": 8.398209571838379, "learning_rate": 6.1189591078066915e-06, "loss": 0.8382, "step": 6650 }, { "epoch": 2.6796467282215977, "grad_norm": 7.7773566246032715, "learning_rate": 5.933085501858736e-06, "loss": 0.8239, "step": 6675 }, { "epoch": 2.6896828582898435, "grad_norm": 17.32167625427246, "learning_rate": 5.747211895910781e-06, "loss": 0.8283, "step": 6700 }, { "epoch": 2.699718988358089, "grad_norm": 9.905217170715332, "learning_rate": 5.561338289962826e-06, "loss": 0.7463, "step": 6725 }, { "epoch": 2.709755118426335, "grad_norm": 8.677200317382812, "learning_rate": 5.37546468401487e-06, "loss": 0.658, "step": 6750 }, { "epoch": 2.7197912484945803, "grad_norm": 12.012971878051758, "learning_rate": 5.189591078066915e-06, "loss": 0.7865, "step": 6775 }, { "epoch": 2.729827378562826, "grad_norm": 13.909863471984863, "learning_rate": 5.00371747211896e-06, "loss": 0.863, "step": 6800 }, { "epoch": 2.739863508631072, "grad_norm": 2.4488868713378906, "learning_rate": 4.817843866171004e-06, "loss": 0.7535, "step": 6825 }, { "epoch": 2.7498996386993175, "grad_norm": 12.209616661071777, "learning_rate": 4.631970260223048e-06, "loss": 0.8308, "step": 6850 }, { "epoch": 2.759935768767563, "grad_norm": 10.368229866027832, "learning_rate": 4.446096654275093e-06, "loss": 0.8772, "step": 6875 }, { "epoch": 2.769971898835809, "grad_norm": 10.47776985168457, "learning_rate": 4.260223048327138e-06, "loss": 0.7404, "step": 6900 }, { "epoch": 2.7800080289040547, "grad_norm": 7.025723934173584, "learning_rate": 4.0743494423791824e-06, "loss": 0.7694, "step": 6925 }, { "epoch": 2.7900441589723, "grad_norm": 16.20415687561035, "learning_rate": 3.888475836431227e-06, "loss": 0.8484, "step": 6950 }, { "epoch": 2.800080289040546, "grad_norm": 7.123861312866211, "learning_rate": 3.7026022304832714e-06, "loss": 0.778, "step": 6975 }, { "epoch": 2.8101164191087915, "grad_norm": 14.885129928588867, "learning_rate": 3.516728624535316e-06, "loss": 0.8011, "step": 7000 }, { "epoch": 2.8201525491770374, "grad_norm": 5.704489231109619, "learning_rate": 3.330855018587361e-06, "loss": 0.8135, "step": 7025 }, { "epoch": 2.830188679245283, "grad_norm": 15.538228988647461, "learning_rate": 3.1449814126394056e-06, "loss": 0.7954, "step": 7050 }, { "epoch": 2.8402248093135287, "grad_norm": 13.824156761169434, "learning_rate": 2.95910780669145e-06, "loss": 0.8175, "step": 7075 }, { "epoch": 2.8502609393817746, "grad_norm": 12.763619422912598, "learning_rate": 2.7732342007434946e-06, "loss": 0.9212, "step": 7100 }, { "epoch": 2.86029706945002, "grad_norm": 8.71843433380127, "learning_rate": 2.587360594795539e-06, "loss": 0.8323, "step": 7125 }, { "epoch": 2.8703331995182655, "grad_norm": 8.395557403564453, "learning_rate": 2.4014869888475835e-06, "loss": 0.7226, "step": 7150 }, { "epoch": 2.8803693295865114, "grad_norm": 2.9431092739105225, "learning_rate": 2.2156133828996283e-06, "loss": 0.7787, "step": 7175 }, { "epoch": 2.8904054596547573, "grad_norm": 6.241610527038574, "learning_rate": 2.029739776951673e-06, "loss": 0.7049, "step": 7200 }, { "epoch": 2.9004415897230027, "grad_norm": 8.485464096069336, "learning_rate": 1.8438661710037175e-06, "loss": 0.7828, "step": 7225 }, { "epoch": 2.9104777197912486, "grad_norm": 12.730948448181152, "learning_rate": 1.6579925650557622e-06, "loss": 0.7128, "step": 7250 }, { "epoch": 2.920513849859494, "grad_norm": 9.1871337890625, "learning_rate": 1.4721189591078069e-06, "loss": 0.8205, "step": 7275 }, { "epoch": 2.93054997992774, "grad_norm": 8.444822311401367, "learning_rate": 1.2862453531598514e-06, "loss": 0.7645, "step": 7300 }, { "epoch": 2.9405861099959854, "grad_norm": 8.305177688598633, "learning_rate": 1.100371747211896e-06, "loss": 0.6978, "step": 7325 }, { "epoch": 2.9506222400642312, "grad_norm": 8.048552513122559, "learning_rate": 9.144981412639406e-07, "loss": 0.7673, "step": 7350 }, { "epoch": 2.960658370132477, "grad_norm": 7.963881492614746, "learning_rate": 7.286245353159852e-07, "loss": 0.6996, "step": 7375 }, { "epoch": 2.9706945002007226, "grad_norm": 3.0167317390441895, "learning_rate": 5.427509293680298e-07, "loss": 0.7399, "step": 7400 }, { "epoch": 2.9807306302689685, "grad_norm": 10.247928619384766, "learning_rate": 3.568773234200744e-07, "loss": 0.7349, "step": 7425 }, { "epoch": 2.990766760337214, "grad_norm": 6.888391494750977, "learning_rate": 1.7100371747211895e-07, "loss": 0.8048, "step": 7450 }, { "epoch": 3.0, "eval_accuracy": 0.6850662384584504, "eval_f1_macro": 0.6845343178671808, "eval_f1_micro": 0.6850662384584504, "eval_f1_weighted": 0.6834454222699836, "eval_loss": 0.8851153254508972, "eval_precision_macro": 0.6899698563739772, "eval_precision_micro": 0.6850662384584504, "eval_precision_weighted": 0.6888476966796808, "eval_recall_macro": 0.6860535953088197, "eval_recall_micro": 0.6850662384584504, "eval_recall_weighted": 0.6850662384584504, "eval_runtime": 814.4881, "eval_samples_per_second": 6.117, "eval_steps_per_second": 0.383, "step": 7473 } ], "logging_steps": 25, "max_steps": 7473, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3932218859904000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }