{ "best_metric": 0.011031342670321465, "best_model_checkpoint": "autotrain-mb2mv-qdf75/checkpoint-3839", "epoch": 11.0, "eval_steps": 500, "global_step": 3839, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04871060171919771, "grad_norm": 12.027495384216309, "learning_rate": 2.028639618138425e-06, "loss": 4.4947, "step": 17 }, { "epoch": 0.09742120343839542, "grad_norm": 11.511441230773926, "learning_rate": 4.05727923627685e-06, "loss": 4.4912, "step": 34 }, { "epoch": 0.14613180515759314, "grad_norm": 12.500868797302246, "learning_rate": 5.966587112171838e-06, "loss": 4.4842, "step": 51 }, { "epoch": 0.19484240687679083, "grad_norm": 11.219860076904297, "learning_rate": 7.995226730310263e-06, "loss": 4.4186, "step": 68 }, { "epoch": 0.24355300859598855, "grad_norm": 11.510499000549316, "learning_rate": 1.0023866348448688e-05, "loss": 4.5182, "step": 85 }, { "epoch": 0.2922636103151863, "grad_norm": 12.528426170349121, "learning_rate": 1.2052505966587113e-05, "loss": 4.4535, "step": 102 }, { "epoch": 0.34097421203438394, "grad_norm": 11.191871643066406, "learning_rate": 1.4081145584725539e-05, "loss": 4.4233, "step": 119 }, { "epoch": 0.38968481375358166, "grad_norm": 13.019774436950684, "learning_rate": 1.6109785202863962e-05, "loss": 4.3845, "step": 136 }, { "epoch": 0.4383954154727794, "grad_norm": 11.506011962890625, "learning_rate": 1.8138424821002386e-05, "loss": 4.3334, "step": 153 }, { "epoch": 0.4871060171919771, "grad_norm": 11.238750457763672, "learning_rate": 2.0167064439140813e-05, "loss": 4.2554, "step": 170 }, { "epoch": 0.5358166189111748, "grad_norm": 11.771519660949707, "learning_rate": 2.2195704057279237e-05, "loss": 4.2028, "step": 187 }, { "epoch": 0.5845272206303725, "grad_norm": 12.988870620727539, "learning_rate": 2.4224343675417664e-05, "loss": 4.0815, "step": 204 }, { "epoch": 0.6332378223495702, "grad_norm": 12.157154083251953, "learning_rate": 2.6252983293556088e-05, "loss": 4.1055, "step": 221 }, { "epoch": 0.6819484240687679, "grad_norm": 12.214086532592773, "learning_rate": 2.828162291169451e-05, "loss": 3.9383, "step": 238 }, { "epoch": 0.7306590257879656, "grad_norm": 11.511253356933594, "learning_rate": 3.031026252983294e-05, "loss": 3.9894, "step": 255 }, { "epoch": 0.7793696275071633, "grad_norm": 13.291036605834961, "learning_rate": 3.233890214797136e-05, "loss": 3.7944, "step": 272 }, { "epoch": 0.828080229226361, "grad_norm": 13.204787254333496, "learning_rate": 3.424821002386635e-05, "loss": 3.7806, "step": 289 }, { "epoch": 0.8767908309455588, "grad_norm": 13.473506927490234, "learning_rate": 3.627684964200477e-05, "loss": 3.6716, "step": 306 }, { "epoch": 0.9255014326647565, "grad_norm": 12.780734062194824, "learning_rate": 3.83054892601432e-05, "loss": 3.4641, "step": 323 }, { "epoch": 0.9742120343839542, "grad_norm": 20.36208724975586, "learning_rate": 4.0334128878281626e-05, "loss": 3.3018, "step": 340 }, { "epoch": 1.0, "eval_accuracy": 0.43902439024390244, "eval_f1_macro": 0.3762682151157721, "eval_f1_micro": 0.43902439024390244, "eval_f1_weighted": 0.38500671624754507, "eval_loss": 2.9648730754852295, "eval_precision_macro": 0.5231114557498497, "eval_precision_micro": 0.43902439024390244, "eval_precision_weighted": 0.5343145132587477, "eval_recall_macro": 0.41928580121351205, "eval_recall_micro": 0.43902439024390244, "eval_recall_weighted": 0.43902439024390244, "eval_runtime": 3.4206, "eval_samples_per_second": 203.767, "eval_steps_per_second": 12.863, "step": 349 }, { "epoch": 1.0229226361031518, "grad_norm": 11.021674156188965, "learning_rate": 4.236276849642005e-05, "loss": 3.1839, "step": 357 }, { "epoch": 1.0716332378223496, "grad_norm": 12.111454963684082, "learning_rate": 4.4391408114558474e-05, "loss": 3.0222, "step": 374 }, { "epoch": 1.1203438395415473, "grad_norm": 12.602492332458496, "learning_rate": 4.64200477326969e-05, "loss": 2.8135, "step": 391 }, { "epoch": 1.1690544412607449, "grad_norm": 11.924349784851074, "learning_rate": 4.844868735083533e-05, "loss": 2.6733, "step": 408 }, { "epoch": 1.2177650429799427, "grad_norm": 11.714224815368652, "learning_rate": 4.9946935526664904e-05, "loss": 2.4497, "step": 425 }, { "epoch": 1.2664756446991405, "grad_norm": 13.355523109436035, "learning_rate": 4.972141151499072e-05, "loss": 2.3633, "step": 442 }, { "epoch": 1.3151862464183381, "grad_norm": 11.773361206054688, "learning_rate": 4.949588750331653e-05, "loss": 2.2063, "step": 459 }, { "epoch": 1.3638968481375358, "grad_norm": 12.601265907287598, "learning_rate": 4.9270363491642345e-05, "loss": 2.2622, "step": 476 }, { "epoch": 1.4126074498567336, "grad_norm": 11.489968299865723, "learning_rate": 4.9044839479968165e-05, "loss": 2.0349, "step": 493 }, { "epoch": 1.4613180515759312, "grad_norm": 11.553853034973145, "learning_rate": 4.881931546829398e-05, "loss": 1.8452, "step": 510 }, { "epoch": 1.5100286532951288, "grad_norm": 13.461709976196289, "learning_rate": 4.85937914566198e-05, "loss": 1.7711, "step": 527 }, { "epoch": 1.5587392550143266, "grad_norm": 11.289037704467773, "learning_rate": 4.8368267444945606e-05, "loss": 1.562, "step": 544 }, { "epoch": 1.6074498567335245, "grad_norm": 9.85034465789795, "learning_rate": 4.814274343327143e-05, "loss": 1.4258, "step": 561 }, { "epoch": 1.656160458452722, "grad_norm": 8.626704216003418, "learning_rate": 4.791721942159724e-05, "loss": 1.2561, "step": 578 }, { "epoch": 1.7048710601719197, "grad_norm": 8.728435516357422, "learning_rate": 4.769169540992306e-05, "loss": 1.229, "step": 595 }, { "epoch": 1.7535816618911175, "grad_norm": 11.541463851928711, "learning_rate": 4.7466171398248875e-05, "loss": 1.175, "step": 612 }, { "epoch": 1.8022922636103151, "grad_norm": 11.69316291809082, "learning_rate": 4.724064738657469e-05, "loss": 1.2961, "step": 629 }, { "epoch": 1.8510028653295127, "grad_norm": 7.8675737380981445, "learning_rate": 4.70151233749005e-05, "loss": 0.9699, "step": 646 }, { "epoch": 1.8997134670487106, "grad_norm": 7.240268230438232, "learning_rate": 4.678959936322632e-05, "loss": 1.235, "step": 663 }, { "epoch": 1.9484240687679084, "grad_norm": 6.3042426109313965, "learning_rate": 4.6564075351552136e-05, "loss": 1.1121, "step": 680 }, { "epoch": 1.997134670487106, "grad_norm": 8.941567420959473, "learning_rate": 4.633855133987796e-05, "loss": 0.9363, "step": 697 }, { "epoch": 2.0, "eval_accuracy": 0.945480631276901, "eval_f1_macro": 0.9425017115213628, "eval_f1_micro": 0.945480631276901, "eval_f1_weighted": 0.9445694655877982, "eval_loss": 0.5551677346229553, "eval_precision_macro": 0.9567204214794576, "eval_precision_micro": 0.945480631276901, "eval_precision_weighted": 0.9597403513113699, "eval_recall_macro": 0.94451833307255, "eval_recall_micro": 0.945480631276901, "eval_recall_weighted": 0.945480631276901, "eval_runtime": 3.3936, "eval_samples_per_second": 205.389, "eval_steps_per_second": 12.966, "step": 698 }, { "epoch": 2.0458452722063036, "grad_norm": 4.50822114944458, "learning_rate": 4.611302732820377e-05, "loss": 1.0456, "step": 714 }, { "epoch": 2.0945558739255015, "grad_norm": 5.0435686111450195, "learning_rate": 4.5887503316529584e-05, "loss": 0.5131, "step": 731 }, { "epoch": 2.1432664756446993, "grad_norm": 5.728665351867676, "learning_rate": 4.5661979304855405e-05, "loss": 0.705, "step": 748 }, { "epoch": 2.1919770773638967, "grad_norm": 4.446041584014893, "learning_rate": 4.543645529318122e-05, "loss": 0.6504, "step": 765 }, { "epoch": 2.2406876790830945, "grad_norm": 5.601328372955322, "learning_rate": 4.521093128150703e-05, "loss": 0.6361, "step": 782 }, { "epoch": 2.2893982808022924, "grad_norm": 6.830723285675049, "learning_rate": 4.4985407269832846e-05, "loss": 0.7554, "step": 799 }, { "epoch": 2.3381088825214897, "grad_norm": 9.851592063903809, "learning_rate": 4.4759883258158666e-05, "loss": 0.6952, "step": 816 }, { "epoch": 2.3868194842406876, "grad_norm": 12.239367485046387, "learning_rate": 4.453435924648448e-05, "loss": 0.7793, "step": 833 }, { "epoch": 2.4355300859598854, "grad_norm": 4.4265360832214355, "learning_rate": 4.43088352348103e-05, "loss": 0.5841, "step": 850 }, { "epoch": 2.4842406876790832, "grad_norm": 11.818037986755371, "learning_rate": 4.4083311223136114e-05, "loss": 0.6259, "step": 867 }, { "epoch": 2.532951289398281, "grad_norm": 13.520539283752441, "learning_rate": 4.385778721146193e-05, "loss": 0.674, "step": 884 }, { "epoch": 2.5816618911174785, "grad_norm": 12.981139183044434, "learning_rate": 4.363226319978774e-05, "loss": 0.7362, "step": 901 }, { "epoch": 2.6303724928366763, "grad_norm": 12.412555694580078, "learning_rate": 4.340673918811356e-05, "loss": 0.557, "step": 918 }, { "epoch": 2.6790830945558737, "grad_norm": 7.840790748596191, "learning_rate": 4.3181215176439375e-05, "loss": 0.6293, "step": 935 }, { "epoch": 2.7277936962750715, "grad_norm": 4.981259822845459, "learning_rate": 4.2955691164765196e-05, "loss": 0.5793, "step": 952 }, { "epoch": 2.7765042979942693, "grad_norm": 7.165767669677734, "learning_rate": 4.2730167153091e-05, "loss": 0.5705, "step": 969 }, { "epoch": 2.825214899713467, "grad_norm": 6.7298736572265625, "learning_rate": 4.250464314141682e-05, "loss": 0.5446, "step": 986 }, { "epoch": 2.873925501432665, "grad_norm": 7.268840789794922, "learning_rate": 4.227911912974264e-05, "loss": 0.4438, "step": 1003 }, { "epoch": 2.9226361031518624, "grad_norm": 1.802043080329895, "learning_rate": 4.205359511806846e-05, "loss": 0.4892, "step": 1020 }, { "epoch": 2.9713467048710602, "grad_norm": 10.247010231018066, "learning_rate": 4.182807110639427e-05, "loss": 0.4922, "step": 1037 }, { "epoch": 3.0, "eval_accuracy": 0.9727403156384505, "eval_f1_macro": 0.9705769072502259, "eval_f1_micro": 0.9727403156384505, "eval_f1_weighted": 0.9719785983620151, "eval_loss": 0.17863501608371735, "eval_precision_macro": 0.9795910916392845, "eval_precision_micro": 0.9727403156384505, "eval_precision_weighted": 0.9787328087184615, "eval_recall_macro": 0.969774335436986, "eval_recall_micro": 0.9727403156384505, "eval_recall_weighted": 0.9727403156384505, "eval_runtime": 3.4979, "eval_samples_per_second": 199.263, "eval_steps_per_second": 12.579, "step": 1047 }, { "epoch": 3.020057306590258, "grad_norm": 9.481998443603516, "learning_rate": 4.1602547094720085e-05, "loss": 0.5422, "step": 1054 }, { "epoch": 3.0687679083094554, "grad_norm": 9.671106338500977, "learning_rate": 4.13770230830459e-05, "loss": 0.408, "step": 1071 }, { "epoch": 3.1174785100286533, "grad_norm": 1.3068506717681885, "learning_rate": 4.115149907137172e-05, "loss": 0.404, "step": 1088 }, { "epoch": 3.166189111747851, "grad_norm": 8.020153045654297, "learning_rate": 4.092597505969753e-05, "loss": 0.4022, "step": 1105 }, { "epoch": 3.2148997134670485, "grad_norm": 9.03290843963623, "learning_rate": 4.070045104802335e-05, "loss": 0.4726, "step": 1122 }, { "epoch": 3.2636103151862463, "grad_norm": 8.347646713256836, "learning_rate": 4.047492703634917e-05, "loss": 0.4158, "step": 1139 }, { "epoch": 3.312320916905444, "grad_norm": 9.76726245880127, "learning_rate": 4.024940302467498e-05, "loss": 0.3523, "step": 1156 }, { "epoch": 3.361031518624642, "grad_norm": 8.464173316955566, "learning_rate": 4.00238790130008e-05, "loss": 0.5155, "step": 1173 }, { "epoch": 3.4097421203438394, "grad_norm": 4.331398963928223, "learning_rate": 3.9798355001326615e-05, "loss": 0.395, "step": 1190 }, { "epoch": 3.458452722063037, "grad_norm": 7.228985786437988, "learning_rate": 3.9572830989652435e-05, "loss": 0.3984, "step": 1207 }, { "epoch": 3.507163323782235, "grad_norm": 10.442928314208984, "learning_rate": 3.934730697797824e-05, "loss": 0.4143, "step": 1224 }, { "epoch": 3.555873925501433, "grad_norm": 8.429516792297363, "learning_rate": 3.912178296630406e-05, "loss": 0.3767, "step": 1241 }, { "epoch": 3.6045845272206303, "grad_norm": 12.501051902770996, "learning_rate": 3.8896258954629876e-05, "loss": 0.3507, "step": 1258 }, { "epoch": 3.653295128939828, "grad_norm": 7.7675652503967285, "learning_rate": 3.86707349429557e-05, "loss": 0.395, "step": 1275 }, { "epoch": 3.702005730659026, "grad_norm": 4.184613227844238, "learning_rate": 3.844521093128151e-05, "loss": 0.2874, "step": 1292 }, { "epoch": 3.7507163323782233, "grad_norm": 10.189749717712402, "learning_rate": 3.8219686919607324e-05, "loss": 0.3396, "step": 1309 }, { "epoch": 3.799426934097421, "grad_norm": 2.022300958633423, "learning_rate": 3.799416290793314e-05, "loss": 0.2436, "step": 1326 }, { "epoch": 3.848137535816619, "grad_norm": 8.71822452545166, "learning_rate": 3.776863889625896e-05, "loss": 0.3773, "step": 1343 }, { "epoch": 3.896848137535817, "grad_norm": 7.879873752593994, "learning_rate": 3.754311488458477e-05, "loss": 0.4686, "step": 1360 }, { "epoch": 3.945558739255014, "grad_norm": 1.0487672090530396, "learning_rate": 3.731759087291059e-05, "loss": 0.3921, "step": 1377 }, { "epoch": 3.994269340974212, "grad_norm": 8.260384559631348, "learning_rate": 3.70920668612364e-05, "loss": 0.2956, "step": 1394 }, { "epoch": 4.0, "eval_accuracy": 0.9885222381635581, "eval_f1_macro": 0.9872476382417075, "eval_f1_micro": 0.9885222381635581, "eval_f1_weighted": 0.9882948425801252, "eval_loss": 0.09004738181829453, "eval_precision_macro": 0.9903644882560545, "eval_precision_micro": 0.9885222381635581, "eval_precision_weighted": 0.9902174543135807, "eval_recall_macro": 0.9865557467967108, "eval_recall_micro": 0.9885222381635581, "eval_recall_weighted": 0.9885222381635581, "eval_runtime": 3.3411, "eval_samples_per_second": 208.616, "eval_steps_per_second": 13.169, "step": 1396 }, { "epoch": 4.042979942693409, "grad_norm": 6.5528106689453125, "learning_rate": 3.686654284956222e-05, "loss": 0.4403, "step": 1411 }, { "epoch": 4.091690544412607, "grad_norm": 3.8414504528045654, "learning_rate": 3.6641018837888034e-05, "loss": 0.2637, "step": 1428 }, { "epoch": 4.140401146131805, "grad_norm": 16.609180450439453, "learning_rate": 3.6415494826213854e-05, "loss": 0.3618, "step": 1445 }, { "epoch": 4.189111747851003, "grad_norm": 2.179348945617676, "learning_rate": 3.618997081453967e-05, "loss": 0.4447, "step": 1462 }, { "epoch": 4.237822349570201, "grad_norm": 3.5908546447753906, "learning_rate": 3.596444680286548e-05, "loss": 0.2905, "step": 1479 }, { "epoch": 4.286532951289399, "grad_norm": 7.550769805908203, "learning_rate": 3.5738922791191295e-05, "loss": 0.2448, "step": 1496 }, { "epoch": 4.3352435530085955, "grad_norm": 0.7109397649765015, "learning_rate": 3.5513398779517116e-05, "loss": 0.2127, "step": 1513 }, { "epoch": 4.383954154727793, "grad_norm": 1.54320228099823, "learning_rate": 3.528787476784293e-05, "loss": 0.2202, "step": 1530 }, { "epoch": 4.432664756446991, "grad_norm": 10.156286239624023, "learning_rate": 3.506235075616875e-05, "loss": 0.3263, "step": 1547 }, { "epoch": 4.481375358166189, "grad_norm": 11.149276733398438, "learning_rate": 3.4836826744494563e-05, "loss": 0.213, "step": 1564 }, { "epoch": 4.530085959885387, "grad_norm": 14.087788581848145, "learning_rate": 3.461130273282038e-05, "loss": 0.3907, "step": 1581 }, { "epoch": 4.578796561604585, "grad_norm": 6.006841659545898, "learning_rate": 3.43857787211462e-05, "loss": 0.4959, "step": 1598 }, { "epoch": 4.6275071633237825, "grad_norm": 6.818835258483887, "learning_rate": 3.416025470947201e-05, "loss": 0.3309, "step": 1615 }, { "epoch": 4.6762177650429795, "grad_norm": 1.0696688890457153, "learning_rate": 3.393473069779783e-05, "loss": 0.2245, "step": 1632 }, { "epoch": 4.724928366762177, "grad_norm": 11.383952140808105, "learning_rate": 3.370920668612364e-05, "loss": 0.3473, "step": 1649 }, { "epoch": 4.773638968481375, "grad_norm": 7.438843727111816, "learning_rate": 3.348368267444946e-05, "loss": 0.2508, "step": 1666 }, { "epoch": 4.822349570200573, "grad_norm": 1.553702473640442, "learning_rate": 3.325815866277527e-05, "loss": 0.2669, "step": 1683 }, { "epoch": 4.871060171919771, "grad_norm": 5.968568325042725, "learning_rate": 3.303263465110109e-05, "loss": 0.1219, "step": 1700 }, { "epoch": 4.919770773638969, "grad_norm": 0.3757087290287018, "learning_rate": 3.280711063942691e-05, "loss": 0.2749, "step": 1717 }, { "epoch": 4.9684813753581665, "grad_norm": 7.143729209899902, "learning_rate": 3.258158662775272e-05, "loss": 0.1591, "step": 1734 }, { "epoch": 5.0, "eval_accuracy": 0.9885222381635581, "eval_f1_macro": 0.9880046003912989, "eval_f1_micro": 0.9885222381635581, "eval_f1_weighted": 0.9883902987879117, "eval_loss": 0.05691728740930557, "eval_precision_macro": 0.9900693683826214, "eval_precision_micro": 0.9885222381635581, "eval_precision_weighted": 0.9900236221613553, "eval_recall_macro": 0.9879231210556513, "eval_recall_micro": 0.9885222381635581, "eval_recall_weighted": 0.9885222381635581, "eval_runtime": 3.3973, "eval_samples_per_second": 205.165, "eval_steps_per_second": 12.952, "step": 1745 }, { "epoch": 5.017191977077364, "grad_norm": 1.9127634763717651, "learning_rate": 3.2356062616078534e-05, "loss": 0.397, "step": 1751 }, { "epoch": 5.065902578796561, "grad_norm": 8.336675643920898, "learning_rate": 3.2130538604404355e-05, "loss": 0.2355, "step": 1768 }, { "epoch": 5.114613180515759, "grad_norm": 7.159496307373047, "learning_rate": 3.190501459273017e-05, "loss": 0.2086, "step": 1785 }, { "epoch": 5.163323782234957, "grad_norm": 6.03056526184082, "learning_rate": 3.167949058105599e-05, "loss": 0.2212, "step": 1802 }, { "epoch": 5.212034383954155, "grad_norm": 7.681415557861328, "learning_rate": 3.1453966569381796e-05, "loss": 0.2721, "step": 1819 }, { "epoch": 5.260744985673353, "grad_norm": 1.4911251068115234, "learning_rate": 3.1228442557707616e-05, "loss": 0.1994, "step": 1836 }, { "epoch": 5.30945558739255, "grad_norm": 7.99345588684082, "learning_rate": 3.100291854603343e-05, "loss": 0.312, "step": 1853 }, { "epoch": 5.358166189111748, "grad_norm": 3.288712978363037, "learning_rate": 3.077739453435925e-05, "loss": 0.3002, "step": 1870 }, { "epoch": 5.406876790830945, "grad_norm": 0.1384359449148178, "learning_rate": 3.0551870522685064e-05, "loss": 0.1875, "step": 1887 }, { "epoch": 5.455587392550143, "grad_norm": 2.912055730819702, "learning_rate": 3.0326346511010878e-05, "loss": 0.1617, "step": 1904 }, { "epoch": 5.504297994269341, "grad_norm": 9.510294914245605, "learning_rate": 3.0100822499336695e-05, "loss": 0.1553, "step": 1921 }, { "epoch": 5.553008595988539, "grad_norm": 5.520040988922119, "learning_rate": 2.9875298487662512e-05, "loss": 0.1949, "step": 1938 }, { "epoch": 5.6017191977077365, "grad_norm": 0.39325079321861267, "learning_rate": 2.964977447598833e-05, "loss": 0.2814, "step": 1955 }, { "epoch": 5.650429799426934, "grad_norm": 0.1934385895729065, "learning_rate": 2.942425046431414e-05, "loss": 0.3251, "step": 1972 }, { "epoch": 5.699140401146132, "grad_norm": 5.8890533447265625, "learning_rate": 2.9198726452639957e-05, "loss": 0.1652, "step": 1989 }, { "epoch": 5.747851002865329, "grad_norm": 5.028823375701904, "learning_rate": 2.8973202440965774e-05, "loss": 0.305, "step": 2006 }, { "epoch": 5.796561604584527, "grad_norm": 0.35111504793167114, "learning_rate": 2.874767842929159e-05, "loss": 0.1684, "step": 2023 }, { "epoch": 5.845272206303725, "grad_norm": 1.910530686378479, "learning_rate": 2.8522154417617408e-05, "loss": 0.2535, "step": 2040 }, { "epoch": 5.893982808022923, "grad_norm": 5.5074334144592285, "learning_rate": 2.8296630405943218e-05, "loss": 0.2924, "step": 2057 }, { "epoch": 5.9426934097421205, "grad_norm": 6.081971645355225, "learning_rate": 2.8071106394269035e-05, "loss": 0.1663, "step": 2074 }, { "epoch": 5.991404011461318, "grad_norm": 1.7783217430114746, "learning_rate": 2.7845582382594852e-05, "loss": 0.1912, "step": 2091 }, { "epoch": 6.0, "eval_accuracy": 0.9971305595408895, "eval_f1_macro": 0.997156659844563, "eval_f1_micro": 0.9971305595408895, "eval_f1_weighted": 0.9971139798573604, "eval_loss": 0.031095275655388832, "eval_precision_macro": 0.9975660216624072, "eval_precision_micro": 0.9971305595408895, "eval_precision_weighted": 0.9974204020115067, "eval_recall_macro": 0.9970740103270225, "eval_recall_micro": 0.9971305595408895, "eval_recall_weighted": 0.9971305595408895, "eval_runtime": 3.3447, "eval_samples_per_second": 208.387, "eval_steps_per_second": 13.155, "step": 2094 }, { "epoch": 6.040114613180516, "grad_norm": 12.544295310974121, "learning_rate": 2.762005837092067e-05, "loss": 0.2401, "step": 2108 }, { "epoch": 6.088825214899713, "grad_norm": 0.36089888215065, "learning_rate": 2.7394534359246486e-05, "loss": 0.1197, "step": 2125 }, { "epoch": 6.137535816618911, "grad_norm": 3.824916124343872, "learning_rate": 2.71690103475723e-05, "loss": 0.2304, "step": 2142 }, { "epoch": 6.186246418338109, "grad_norm": 7.010196685791016, "learning_rate": 2.6943486335898117e-05, "loss": 0.2393, "step": 2159 }, { "epoch": 6.234957020057307, "grad_norm": 0.23738817870616913, "learning_rate": 2.6717962324223934e-05, "loss": 0.2878, "step": 2176 }, { "epoch": 6.283667621776504, "grad_norm": 6.685153961181641, "learning_rate": 2.649243831254975e-05, "loss": 0.23, "step": 2193 }, { "epoch": 6.332378223495702, "grad_norm": 9.155635833740234, "learning_rate": 2.626691430087557e-05, "loss": 0.127, "step": 2210 }, { "epoch": 6.3810888252149, "grad_norm": 1.8248714208602905, "learning_rate": 2.604139028920138e-05, "loss": 0.264, "step": 2227 }, { "epoch": 6.429799426934097, "grad_norm": 3.287179946899414, "learning_rate": 2.5815866277527196e-05, "loss": 0.2568, "step": 2244 }, { "epoch": 6.478510028653295, "grad_norm": 0.12249535322189331, "learning_rate": 2.5590342265853013e-05, "loss": 0.1211, "step": 2261 }, { "epoch": 6.527220630372493, "grad_norm": 9.649252891540527, "learning_rate": 2.536481825417883e-05, "loss": 0.2599, "step": 2278 }, { "epoch": 6.5759312320916905, "grad_norm": 8.501516342163086, "learning_rate": 2.5139294242504647e-05, "loss": 0.1997, "step": 2295 }, { "epoch": 6.624641833810888, "grad_norm": 6.785931587219238, "learning_rate": 2.491377023083046e-05, "loss": 0.1947, "step": 2312 }, { "epoch": 6.673352435530086, "grad_norm": 1.3328988552093506, "learning_rate": 2.4688246219156274e-05, "loss": 0.2236, "step": 2329 }, { "epoch": 6.722063037249284, "grad_norm": 9.384140014648438, "learning_rate": 2.446272220748209e-05, "loss": 0.2394, "step": 2346 }, { "epoch": 6.770773638968482, "grad_norm": 1.0058611631393433, "learning_rate": 2.423719819580791e-05, "loss": 0.1288, "step": 2363 }, { "epoch": 6.819484240687679, "grad_norm": 0.7905517816543579, "learning_rate": 2.4011674184133722e-05, "loss": 0.1528, "step": 2380 }, { "epoch": 6.868194842406877, "grad_norm": 10.827178955078125, "learning_rate": 2.378615017245954e-05, "loss": 0.1767, "step": 2397 }, { "epoch": 6.916905444126074, "grad_norm": 7.897141933441162, "learning_rate": 2.3560626160785353e-05, "loss": 0.1411, "step": 2414 }, { "epoch": 6.965616045845272, "grad_norm": 4.635827541351318, "learning_rate": 2.333510214911117e-05, "loss": 0.1712, "step": 2431 }, { "epoch": 7.0, "eval_accuracy": 0.9956958393113343, "eval_f1_macro": 0.9951431111442676, "eval_f1_micro": 0.9956958393113343, "eval_f1_weighted": 0.9956405011600654, "eval_loss": 0.024958999827504158, "eval_precision_macro": 0.995983935742972, "eval_precision_micro": 0.9956958393113343, "eval_precision_weighted": 0.9964929061055317, "eval_recall_macro": 0.9953528399311532, "eval_recall_micro": 0.9956958393113343, "eval_recall_weighted": 0.9956958393113343, "eval_runtime": 3.4628, "eval_samples_per_second": 201.281, "eval_steps_per_second": 12.706, "step": 2443 }, { "epoch": 7.01432664756447, "grad_norm": 0.12232652306556702, "learning_rate": 2.3109578137436987e-05, "loss": 0.2055, "step": 2448 }, { "epoch": 7.063037249283668, "grad_norm": 0.07312128692865372, "learning_rate": 2.28840541257628e-05, "loss": 0.1848, "step": 2465 }, { "epoch": 7.111747851002866, "grad_norm": 0.32181409001350403, "learning_rate": 2.2658530114088618e-05, "loss": 0.1364, "step": 2482 }, { "epoch": 7.160458452722063, "grad_norm": 0.7672788500785828, "learning_rate": 2.2433006102414432e-05, "loss": 0.072, "step": 2499 }, { "epoch": 7.2091690544412605, "grad_norm": 8.377331733703613, "learning_rate": 2.220748209074025e-05, "loss": 0.2638, "step": 2516 }, { "epoch": 7.257879656160458, "grad_norm": 9.670488357543945, "learning_rate": 2.1981958079066066e-05, "loss": 0.2495, "step": 2533 }, { "epoch": 7.306590257879656, "grad_norm": 0.24363534152507782, "learning_rate": 2.1756434067391883e-05, "loss": 0.2038, "step": 2550 }, { "epoch": 7.355300859598854, "grad_norm": 2.2357654571533203, "learning_rate": 2.15309100557177e-05, "loss": 0.2934, "step": 2567 }, { "epoch": 7.404011461318052, "grad_norm": 0.20546384155750275, "learning_rate": 2.1305386044043514e-05, "loss": 0.1834, "step": 2584 }, { "epoch": 7.45272206303725, "grad_norm": 0.32598844170570374, "learning_rate": 2.107986203236933e-05, "loss": 0.0821, "step": 2601 }, { "epoch": 7.501432664756447, "grad_norm": 8.553650856018066, "learning_rate": 2.0854338020695148e-05, "loss": 0.0992, "step": 2618 }, { "epoch": 7.5501432664756445, "grad_norm": 0.3119734525680542, "learning_rate": 2.062881400902096e-05, "loss": 0.2344, "step": 2635 }, { "epoch": 7.598853868194842, "grad_norm": 6.1670002937316895, "learning_rate": 2.040328999734678e-05, "loss": 0.1058, "step": 2652 }, { "epoch": 7.64756446991404, "grad_norm": 2.705218553543091, "learning_rate": 2.0177765985672592e-05, "loss": 0.1608, "step": 2669 }, { "epoch": 7.696275071633238, "grad_norm": 5.938003063201904, "learning_rate": 1.995224197399841e-05, "loss": 0.1554, "step": 2686 }, { "epoch": 7.744985673352436, "grad_norm": 0.41698479652404785, "learning_rate": 1.9726717962324227e-05, "loss": 0.0979, "step": 2703 }, { "epoch": 7.793696275071634, "grad_norm": 0.4503624141216278, "learning_rate": 1.950119395065004e-05, "loss": 0.1353, "step": 2720 }, { "epoch": 7.842406876790831, "grad_norm": 4.662674427032471, "learning_rate": 1.9275669938975857e-05, "loss": 0.1295, "step": 2737 }, { "epoch": 7.891117478510028, "grad_norm": 5.666357517242432, "learning_rate": 1.905014592730167e-05, "loss": 0.2144, "step": 2754 }, { "epoch": 7.939828080229226, "grad_norm": 0.6394052505493164, "learning_rate": 1.8824621915627488e-05, "loss": 0.178, "step": 2771 }, { "epoch": 7.988538681948424, "grad_norm": 9.961098670959473, "learning_rate": 1.8599097903953305e-05, "loss": 0.2561, "step": 2788 }, { "epoch": 8.0, "eval_accuracy": 0.9956958393113343, "eval_f1_macro": 0.9951431111442676, "eval_f1_micro": 0.9956958393113343, "eval_f1_weighted": 0.9956405011600654, "eval_loss": 0.023447172716259956, "eval_precision_macro": 0.995983935742972, "eval_precision_micro": 0.9956958393113343, "eval_precision_weighted": 0.9964929061055317, "eval_recall_macro": 0.9953528399311532, "eval_recall_micro": 0.9956958393113343, "eval_recall_weighted": 0.9956958393113343, "eval_runtime": 3.3596, "eval_samples_per_second": 207.466, "eval_steps_per_second": 13.097, "step": 2792 }, { "epoch": 8.037249283667622, "grad_norm": 10.93313217163086, "learning_rate": 1.837357389227912e-05, "loss": 0.11, "step": 2805 }, { "epoch": 8.085959885386819, "grad_norm": 0.05085707828402519, "learning_rate": 1.8148049880604936e-05, "loss": 0.0616, "step": 2822 }, { "epoch": 8.134670487106018, "grad_norm": 10.42803955078125, "learning_rate": 1.792252586893075e-05, "loss": 0.0648, "step": 2839 }, { "epoch": 8.183381088825215, "grad_norm": 1.379164457321167, "learning_rate": 1.7697001857256567e-05, "loss": 0.1706, "step": 2856 }, { "epoch": 8.232091690544413, "grad_norm": 8.783364295959473, "learning_rate": 1.7471477845582384e-05, "loss": 0.1954, "step": 2873 }, { "epoch": 8.28080229226361, "grad_norm": 1.5522698163986206, "learning_rate": 1.7245953833908197e-05, "loss": 0.2134, "step": 2890 }, { "epoch": 8.329512893982809, "grad_norm": 6.784268379211426, "learning_rate": 1.7020429822234015e-05, "loss": 0.1928, "step": 2907 }, { "epoch": 8.378223495702006, "grad_norm": 3.0361063480377197, "learning_rate": 1.679490581055983e-05, "loss": 0.3187, "step": 2924 }, { "epoch": 8.426934097421203, "grad_norm": 1.8513216972351074, "learning_rate": 1.6569381798885645e-05, "loss": 0.1319, "step": 2941 }, { "epoch": 8.475644699140402, "grad_norm": 0.5567758083343506, "learning_rate": 1.6343857787211462e-05, "loss": 0.136, "step": 2958 }, { "epoch": 8.524355300859598, "grad_norm": 2.810915231704712, "learning_rate": 1.611833377553728e-05, "loss": 0.0858, "step": 2975 }, { "epoch": 8.573065902578797, "grad_norm": 1.9855493307113647, "learning_rate": 1.5892809763863097e-05, "loss": 0.1732, "step": 2992 }, { "epoch": 8.621776504297994, "grad_norm": 0.1735798567533493, "learning_rate": 1.566728575218891e-05, "loss": 0.0911, "step": 3009 }, { "epoch": 8.670487106017191, "grad_norm": 0.19329993426799774, "learning_rate": 1.5441761740514727e-05, "loss": 0.1024, "step": 3026 }, { "epoch": 8.71919770773639, "grad_norm": 4.43624210357666, "learning_rate": 1.5216237728840543e-05, "loss": 0.1408, "step": 3043 }, { "epoch": 8.767908309455587, "grad_norm": 9.911310195922852, "learning_rate": 1.4990713717166358e-05, "loss": 0.1626, "step": 3060 }, { "epoch": 8.816618911174785, "grad_norm": 1.323052167892456, "learning_rate": 1.4765189705492175e-05, "loss": 0.1222, "step": 3077 }, { "epoch": 8.865329512893982, "grad_norm": 11.561975479125977, "learning_rate": 1.4539665693817989e-05, "loss": 0.1769, "step": 3094 }, { "epoch": 8.914040114613181, "grad_norm": 0.08104487508535385, "learning_rate": 1.4314141682143806e-05, "loss": 0.0953, "step": 3111 }, { "epoch": 8.962750716332378, "grad_norm": 0.04927730932831764, "learning_rate": 1.408861767046962e-05, "loss": 0.0574, "step": 3128 }, { "epoch": 9.0, "eval_accuracy": 0.9956958393113343, "eval_f1_macro": 0.9948504424939576, "eval_f1_micro": 0.9956958393113343, "eval_f1_weighted": 0.9956230754082893, "eval_loss": 0.017519734799861908, "eval_precision_macro": 0.995983935742972, "eval_precision_micro": 0.9956958393113343, "eval_precision_weighted": 0.9964929061055317, "eval_recall_macro": 0.9948364888123926, "eval_recall_micro": 0.9956958393113343, "eval_recall_weighted": 0.9956958393113343, "eval_runtime": 3.4072, "eval_samples_per_second": 204.565, "eval_steps_per_second": 12.914, "step": 3141 }, { "epoch": 9.011461318051577, "grad_norm": 0.3880198001861572, "learning_rate": 1.3863093658795437e-05, "loss": 0.1954, "step": 3145 }, { "epoch": 9.060171919770774, "grad_norm": 0.05076654255390167, "learning_rate": 1.3637569647121254e-05, "loss": 0.0797, "step": 3162 }, { "epoch": 9.10888252148997, "grad_norm": 6.032546043395996, "learning_rate": 1.3412045635447068e-05, "loss": 0.2393, "step": 3179 }, { "epoch": 9.15759312320917, "grad_norm": 11.056164741516113, "learning_rate": 1.3186521623772885e-05, "loss": 0.1001, "step": 3196 }, { "epoch": 9.206303724928366, "grad_norm": 0.19840994477272034, "learning_rate": 1.29609976120987e-05, "loss": 0.0743, "step": 3213 }, { "epoch": 9.255014326647565, "grad_norm": 4.645060062408447, "learning_rate": 1.2735473600424515e-05, "loss": 0.1446, "step": 3230 }, { "epoch": 9.303724928366762, "grad_norm": 9.013117790222168, "learning_rate": 1.2509949588750332e-05, "loss": 0.1288, "step": 3247 }, { "epoch": 9.35243553008596, "grad_norm": 1.7711181640625, "learning_rate": 1.228442557707615e-05, "loss": 0.0835, "step": 3264 }, { "epoch": 9.401146131805158, "grad_norm": 5.3366379737854, "learning_rate": 1.2058901565401965e-05, "loss": 0.1201, "step": 3281 }, { "epoch": 9.449856733524355, "grad_norm": 3.4900286197662354, "learning_rate": 1.183337755372778e-05, "loss": 0.1026, "step": 3298 }, { "epoch": 9.498567335243553, "grad_norm": 0.059569913893938065, "learning_rate": 1.1607853542053596e-05, "loss": 0.1123, "step": 3315 }, { "epoch": 9.54727793696275, "grad_norm": 8.251703262329102, "learning_rate": 1.1382329530379411e-05, "loss": 0.1441, "step": 3332 }, { "epoch": 9.595988538681949, "grad_norm": 1.078260064125061, "learning_rate": 1.1156805518705226e-05, "loss": 0.1191, "step": 3349 }, { "epoch": 9.644699140401146, "grad_norm": 7.364470958709717, "learning_rate": 1.0931281507031044e-05, "loss": 0.1539, "step": 3366 }, { "epoch": 9.693409742120345, "grad_norm": 2.354499101638794, "learning_rate": 1.0705757495356859e-05, "loss": 0.1502, "step": 3383 }, { "epoch": 9.742120343839542, "grad_norm": 10.193525314331055, "learning_rate": 1.0480233483682674e-05, "loss": 0.1199, "step": 3400 }, { "epoch": 9.790830945558739, "grad_norm": 2.536367893218994, "learning_rate": 1.025470947200849e-05, "loss": 0.2098, "step": 3417 }, { "epoch": 9.839541547277937, "grad_norm": 5.09243631362915, "learning_rate": 1.0029185460334307e-05, "loss": 0.2896, "step": 3434 }, { "epoch": 9.888252148997134, "grad_norm": 1.098929762840271, "learning_rate": 9.803661448660124e-06, "loss": 0.1017, "step": 3451 }, { "epoch": 9.936962750716333, "grad_norm": 0.1535651981830597, "learning_rate": 9.57813743698594e-06, "loss": 0.1077, "step": 3468 }, { "epoch": 9.98567335243553, "grad_norm": 0.5236210227012634, "learning_rate": 9.352613425311755e-06, "loss": 0.084, "step": 3485 }, { "epoch": 10.0, "eval_accuracy": 0.9956958393113343, "eval_f1_macro": 0.9949831411206324, "eval_f1_micro": 0.9956958393113343, "eval_f1_weighted": 0.9955771743939521, "eval_loss": 0.015395666472613811, "eval_precision_macro": 0.9964707314104905, "eval_precision_micro": 0.9956958393113343, "eval_precision_weighted": 0.9963769691172847, "eval_recall_macro": 0.9945496270797476, "eval_recall_micro": 0.9956958393113343, "eval_recall_weighted": 0.9956958393113343, "eval_runtime": 3.4096, "eval_samples_per_second": 204.424, "eval_steps_per_second": 12.905, "step": 3490 }, { "epoch": 10.034383954154729, "grad_norm": 0.10064805299043655, "learning_rate": 9.12708941363757e-06, "loss": 0.129, "step": 3502 }, { "epoch": 10.083094555873926, "grad_norm": 0.9957130551338196, "learning_rate": 8.901565401963385e-06, "loss": 0.0652, "step": 3519 }, { "epoch": 10.131805157593123, "grad_norm": 4.763299465179443, "learning_rate": 8.676041390289202e-06, "loss": 0.1531, "step": 3536 }, { "epoch": 10.180515759312321, "grad_norm": 0.9550924897193909, "learning_rate": 8.450517378615018e-06, "loss": 0.0548, "step": 3553 }, { "epoch": 10.229226361031518, "grad_norm": 0.5510568022727966, "learning_rate": 8.224993366940833e-06, "loss": 0.171, "step": 3570 }, { "epoch": 10.277936962750717, "grad_norm": 1.120082139968872, "learning_rate": 7.999469355266649e-06, "loss": 0.1506, "step": 3587 }, { "epoch": 10.326647564469914, "grad_norm": 8.000428199768066, "learning_rate": 7.773945343592464e-06, "loss": 0.0791, "step": 3604 }, { "epoch": 10.375358166189113, "grad_norm": 0.08897445350885391, "learning_rate": 7.548421331918282e-06, "loss": 0.1006, "step": 3621 }, { "epoch": 10.42406876790831, "grad_norm": 1.5076502561569214, "learning_rate": 7.322897320244097e-06, "loss": 0.1085, "step": 3638 }, { "epoch": 10.472779369627506, "grad_norm": 0.3444303870201111, "learning_rate": 7.097373308569913e-06, "loss": 0.0881, "step": 3655 }, { "epoch": 10.521489971346705, "grad_norm": 5.353268146514893, "learning_rate": 6.871849296895728e-06, "loss": 0.1233, "step": 3672 }, { "epoch": 10.570200573065902, "grad_norm": 6.925529479980469, "learning_rate": 6.646325285221544e-06, "loss": 0.1726, "step": 3689 }, { "epoch": 10.6189111747851, "grad_norm": 1.2398282289505005, "learning_rate": 6.42080127354736e-06, "loss": 0.1607, "step": 3706 }, { "epoch": 10.667621776504298, "grad_norm": 0.17667262256145477, "learning_rate": 6.195277261873176e-06, "loss": 0.1065, "step": 3723 }, { "epoch": 10.716332378223496, "grad_norm": 8.593550682067871, "learning_rate": 5.969753250198992e-06, "loss": 0.1598, "step": 3740 }, { "epoch": 10.765042979942693, "grad_norm": 6.634376049041748, "learning_rate": 5.744229238524808e-06, "loss": 0.214, "step": 3757 }, { "epoch": 10.81375358166189, "grad_norm": 0.12865765392780304, "learning_rate": 5.518705226850624e-06, "loss": 0.1079, "step": 3774 }, { "epoch": 10.862464183381089, "grad_norm": 0.4588039815425873, "learning_rate": 5.29318121517644e-06, "loss": 0.1218, "step": 3791 }, { "epoch": 10.911174785100286, "grad_norm": 8.046585083007812, "learning_rate": 5.0676572035022555e-06, "loss": 0.1618, "step": 3808 }, { "epoch": 10.959885386819485, "grad_norm": 0.34073105454444885, "learning_rate": 4.842133191828072e-06, "loss": 0.0788, "step": 3825 }, { "epoch": 11.0, "eval_accuracy": 1.0, "eval_f1_macro": 1.0, "eval_f1_micro": 1.0, "eval_f1_weighted": 1.0, "eval_loss": 0.011031342670321465, "eval_precision_macro": 1.0, "eval_precision_micro": 1.0, "eval_precision_weighted": 1.0, "eval_recall_macro": 1.0, "eval_recall_micro": 1.0, "eval_recall_weighted": 1.0, "eval_runtime": 3.4121, "eval_samples_per_second": 204.275, "eval_steps_per_second": 12.895, "step": 3839 } ], "logging_steps": 17, "max_steps": 4188, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3756905482586214e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }