{ "best_metric": 0.13341853022575378, "best_model_checkpoint": "Emotion-Classification/checkpoint-1000", "epoch": 4.0, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024, "grad_norm": 4.761784076690674, "learning_rate": 2.4000000000000003e-06, "loss": 1.8584, "step": 6 }, { "epoch": 0.048, "grad_norm": 5.941750526428223, "learning_rate": 4.800000000000001e-06, "loss": 1.8129, "step": 12 }, { "epoch": 0.072, "grad_norm": 5.263009071350098, "learning_rate": 7.2e-06, "loss": 1.7576, "step": 18 }, { "epoch": 0.096, "grad_norm": 3.814999580383301, "learning_rate": 9.600000000000001e-06, "loss": 1.6755, "step": 24 }, { "epoch": 0.12, "grad_norm": 4.710057735443115, "learning_rate": 1.2e-05, "loss": 1.651, "step": 30 }, { "epoch": 0.144, "grad_norm": 4.196522235870361, "learning_rate": 1.44e-05, "loss": 1.6415, "step": 36 }, { "epoch": 0.168, "grad_norm": 4.069035053253174, "learning_rate": 1.6800000000000002e-05, "loss": 1.6045, "step": 42 }, { "epoch": 0.192, "grad_norm": 2.729994535446167, "learning_rate": 1.9200000000000003e-05, "loss": 1.5199, "step": 48 }, { "epoch": 0.216, "grad_norm": 3.651451349258423, "learning_rate": 2.16e-05, "loss": 1.5483, "step": 54 }, { "epoch": 0.24, "grad_norm": 3.8345696926116943, "learning_rate": 2.4e-05, "loss": 1.4484, "step": 60 }, { "epoch": 0.264, "grad_norm": 5.036585807800293, "learning_rate": 2.64e-05, "loss": 1.352, "step": 66 }, { "epoch": 0.288, "grad_norm": 4.797017574310303, "learning_rate": 2.88e-05, "loss": 1.2928, "step": 72 }, { "epoch": 0.312, "grad_norm": 4.072025299072266, "learning_rate": 3.12e-05, "loss": 1.2858, "step": 78 }, { "epoch": 0.336, "grad_norm": 4.3313727378845215, "learning_rate": 3.3600000000000004e-05, "loss": 1.1242, "step": 84 }, { "epoch": 0.36, "grad_norm": 5.59000825881958, "learning_rate": 3.56e-05, "loss": 1.1086, "step": 90 }, { "epoch": 0.384, "grad_norm": 8.562559127807617, "learning_rate": 3.8e-05, "loss": 0.9659, "step": 96 }, { "epoch": 0.408, "grad_norm": 7.202652931213379, "learning_rate": 4.0400000000000006e-05, "loss": 0.9103, "step": 102 }, { "epoch": 0.432, "grad_norm": 6.920612812042236, "learning_rate": 4.2800000000000004e-05, "loss": 0.8257, "step": 108 }, { "epoch": 0.456, "grad_norm": 4.389594078063965, "learning_rate": 4.52e-05, "loss": 0.7882, "step": 114 }, { "epoch": 0.48, "grad_norm": 8.991690635681152, "learning_rate": 4.76e-05, "loss": 0.7142, "step": 120 }, { "epoch": 0.504, "grad_norm": 5.250122547149658, "learning_rate": 5e-05, "loss": 0.6461, "step": 126 }, { "epoch": 0.528, "grad_norm": 6.273474216461182, "learning_rate": 4.973333333333334e-05, "loss": 0.6108, "step": 132 }, { "epoch": 0.552, "grad_norm": 5.45080041885376, "learning_rate": 4.9466666666666665e-05, "loss": 0.5098, "step": 138 }, { "epoch": 0.576, "grad_norm": 6.3116068840026855, "learning_rate": 4.92e-05, "loss": 0.4719, "step": 144 }, { "epoch": 0.6, "grad_norm": 9.022950172424316, "learning_rate": 4.8933333333333335e-05, "loss": 0.3653, "step": 150 }, { "epoch": 0.624, "grad_norm": 6.852407455444336, "learning_rate": 4.866666666666667e-05, "loss": 0.4934, "step": 156 }, { "epoch": 0.648, "grad_norm": 4.436535835266113, "learning_rate": 4.8400000000000004e-05, "loss": 0.3817, "step": 162 }, { "epoch": 0.672, "grad_norm": 5.454000949859619, "learning_rate": 4.8133333333333336e-05, "loss": 0.3123, "step": 168 }, { "epoch": 0.696, "grad_norm": 6.807215690612793, "learning_rate": 4.7866666666666674e-05, "loss": 0.2737, "step": 174 }, { "epoch": 0.72, "grad_norm": 4.130777835845947, "learning_rate": 4.76e-05, "loss": 0.3287, "step": 180 }, { "epoch": 0.744, "grad_norm": 15.173467636108398, "learning_rate": 4.7333333333333336e-05, "loss": 0.2525, "step": 186 }, { "epoch": 0.768, "grad_norm": 3.7970285415649414, "learning_rate": 4.706666666666667e-05, "loss": 0.3568, "step": 192 }, { "epoch": 0.792, "grad_norm": 3.6102640628814697, "learning_rate": 4.6800000000000006e-05, "loss": 0.2952, "step": 198 }, { "epoch": 0.816, "grad_norm": 2.048801898956299, "learning_rate": 4.653333333333334e-05, "loss": 0.3303, "step": 204 }, { "epoch": 0.84, "grad_norm": 9.570277214050293, "learning_rate": 4.626666666666667e-05, "loss": 0.2736, "step": 210 }, { "epoch": 0.864, "grad_norm": 3.7270052433013916, "learning_rate": 4.600000000000001e-05, "loss": 0.2399, "step": 216 }, { "epoch": 0.888, "grad_norm": 2.341561794281006, "learning_rate": 4.573333333333333e-05, "loss": 0.2085, "step": 222 }, { "epoch": 0.912, "grad_norm": 3.1725029945373535, "learning_rate": 4.546666666666667e-05, "loss": 0.2126, "step": 228 }, { "epoch": 0.936, "grad_norm": 9.990570068359375, "learning_rate": 4.52e-05, "loss": 0.2663, "step": 234 }, { "epoch": 0.96, "grad_norm": 3.6300106048583984, "learning_rate": 4.493333333333333e-05, "loss": 0.2006, "step": 240 }, { "epoch": 0.984, "grad_norm": 2.360503911972046, "learning_rate": 4.466666666666667e-05, "loss": 0.2727, "step": 246 }, { "epoch": 1.0, "eval_accuracy": 0.9215, "eval_f1_macro": 0.8995337377920993, "eval_f1_micro": 0.9215, "eval_f1_weighted": 0.9231309624099143, "eval_loss": 0.23581676185131073, "eval_precision_macro": 0.8849160410790557, "eval_precision_micro": 0.9215, "eval_precision_weighted": 0.9309033739712098, "eval_recall_macro": 0.9233031833590024, "eval_recall_micro": 0.9215, "eval_recall_weighted": 0.9215, "eval_runtime": 3.3949, "eval_samples_per_second": 589.114, "eval_steps_per_second": 4.713, "step": 250 }, { "epoch": 1.008, "grad_norm": 4.654928684234619, "learning_rate": 4.44e-05, "loss": 0.2359, "step": 252 }, { "epoch": 1.032, "grad_norm": 6.218105792999268, "learning_rate": 4.417777777777778e-05, "loss": 0.1733, "step": 258 }, { "epoch": 1.056, "grad_norm": 5.516075611114502, "learning_rate": 4.3911111111111116e-05, "loss": 0.1447, "step": 264 }, { "epoch": 1.08, "grad_norm": 5.734640598297119, "learning_rate": 4.364444444444445e-05, "loss": 0.1913, "step": 270 }, { "epoch": 1.104, "grad_norm": 4.150918006896973, "learning_rate": 4.337777777777778e-05, "loss": 0.1179, "step": 276 }, { "epoch": 1.1280000000000001, "grad_norm": 3.250934362411499, "learning_rate": 4.311111111111111e-05, "loss": 0.1777, "step": 282 }, { "epoch": 1.152, "grad_norm": 4.020536422729492, "learning_rate": 4.284444444444445e-05, "loss": 0.2122, "step": 288 }, { "epoch": 1.176, "grad_norm": 3.2493388652801514, "learning_rate": 4.257777777777778e-05, "loss": 0.2012, "step": 294 }, { "epoch": 1.2, "grad_norm": 4.640542984008789, "learning_rate": 4.231111111111111e-05, "loss": 0.1433, "step": 300 }, { "epoch": 1.224, "grad_norm": 3.5592172145843506, "learning_rate": 4.204444444444445e-05, "loss": 0.1534, "step": 306 }, { "epoch": 1.248, "grad_norm": 5.752802848815918, "learning_rate": 4.177777777777778e-05, "loss": 0.1625, "step": 312 }, { "epoch": 1.272, "grad_norm": 5.988048553466797, "learning_rate": 4.151111111111111e-05, "loss": 0.1654, "step": 318 }, { "epoch": 1.296, "grad_norm": 3.1433417797088623, "learning_rate": 4.124444444444444e-05, "loss": 0.1507, "step": 324 }, { "epoch": 1.32, "grad_norm": 2.363762617111206, "learning_rate": 4.097777777777778e-05, "loss": 0.1343, "step": 330 }, { "epoch": 1.3439999999999999, "grad_norm": 2.4271459579467773, "learning_rate": 4.071111111111111e-05, "loss": 0.1354, "step": 336 }, { "epoch": 1.3679999999999999, "grad_norm": 5.531872749328613, "learning_rate": 4.0444444444444444e-05, "loss": 0.2238, "step": 342 }, { "epoch": 1.392, "grad_norm": 2.654528856277466, "learning_rate": 4.017777777777778e-05, "loss": 0.1248, "step": 348 }, { "epoch": 1.416, "grad_norm": 2.4685096740722656, "learning_rate": 3.9911111111111114e-05, "loss": 0.142, "step": 354 }, { "epoch": 1.44, "grad_norm": 11.145745277404785, "learning_rate": 3.9644444444444445e-05, "loss": 0.1379, "step": 360 }, { "epoch": 1.464, "grad_norm": 10.70506477355957, "learning_rate": 3.937777777777778e-05, "loss": 0.0927, "step": 366 }, { "epoch": 1.488, "grad_norm": 1.3703113794326782, "learning_rate": 3.9111111111111115e-05, "loss": 0.2147, "step": 372 }, { "epoch": 1.512, "grad_norm": 2.9787039756774902, "learning_rate": 3.8844444444444446e-05, "loss": 0.1816, "step": 378 }, { "epoch": 1.536, "grad_norm": 4.429356575012207, "learning_rate": 3.857777777777778e-05, "loss": 0.1271, "step": 384 }, { "epoch": 1.56, "grad_norm": 2.4006407260894775, "learning_rate": 3.8311111111111115e-05, "loss": 0.1266, "step": 390 }, { "epoch": 1.584, "grad_norm": 2.01302170753479, "learning_rate": 3.804444444444445e-05, "loss": 0.0847, "step": 396 }, { "epoch": 1.608, "grad_norm": 2.242461681365967, "learning_rate": 3.777777777777778e-05, "loss": 0.1802, "step": 402 }, { "epoch": 1.6320000000000001, "grad_norm": 1.8448535203933716, "learning_rate": 3.7511111111111116e-05, "loss": 0.1713, "step": 408 }, { "epoch": 1.6560000000000001, "grad_norm": 2.21156907081604, "learning_rate": 3.724444444444445e-05, "loss": 0.1656, "step": 414 }, { "epoch": 1.6800000000000002, "grad_norm": 4.768124580383301, "learning_rate": 3.697777777777778e-05, "loss": 0.1453, "step": 420 }, { "epoch": 1.704, "grad_norm": 3.267815351486206, "learning_rate": 3.671111111111111e-05, "loss": 0.1755, "step": 426 }, { "epoch": 1.728, "grad_norm": 1.797162652015686, "learning_rate": 3.644444444444445e-05, "loss": 0.1503, "step": 432 }, { "epoch": 1.752, "grad_norm": 2.245846748352051, "learning_rate": 3.617777777777778e-05, "loss": 0.139, "step": 438 }, { "epoch": 1.776, "grad_norm": 4.633366107940674, "learning_rate": 3.591111111111111e-05, "loss": 0.1298, "step": 444 }, { "epoch": 1.8, "grad_norm": 4.398362636566162, "learning_rate": 3.564444444444445e-05, "loss": 0.1703, "step": 450 }, { "epoch": 1.8239999999999998, "grad_norm": 4.176144599914551, "learning_rate": 3.537777777777778e-05, "loss": 0.1503, "step": 456 }, { "epoch": 1.8479999999999999, "grad_norm": 6.216207027435303, "learning_rate": 3.511111111111111e-05, "loss": 0.1558, "step": 462 }, { "epoch": 1.8719999999999999, "grad_norm": 2.794733762741089, "learning_rate": 3.4844444444444444e-05, "loss": 0.1679, "step": 468 }, { "epoch": 1.896, "grad_norm": 6.9759392738342285, "learning_rate": 3.457777777777778e-05, "loss": 0.1304, "step": 474 }, { "epoch": 1.92, "grad_norm": 1.3952761888504028, "learning_rate": 3.431111111111111e-05, "loss": 0.1418, "step": 480 }, { "epoch": 1.944, "grad_norm": 1.8202506303787231, "learning_rate": 3.4044444444444445e-05, "loss": 0.1188, "step": 486 }, { "epoch": 1.968, "grad_norm": 4.880939483642578, "learning_rate": 3.377777777777778e-05, "loss": 0.1252, "step": 492 }, { "epoch": 1.992, "grad_norm": 1.7853918075561523, "learning_rate": 3.3511111111111114e-05, "loss": 0.1397, "step": 498 }, { "epoch": 2.0, "eval_accuracy": 0.9395, "eval_f1_macro": 0.9106849947356129, "eval_f1_micro": 0.9395, "eval_f1_weighted": 0.9383774262681576, "eval_loss": 0.14122824370861053, "eval_precision_macro": 0.9324340670283836, "eval_precision_micro": 0.9395, "eval_precision_weighted": 0.9408265963501754, "eval_recall_macro": 0.8951750793370938, "eval_recall_micro": 0.9395, "eval_recall_weighted": 0.9395, "eval_runtime": 3.4115, "eval_samples_per_second": 586.256, "eval_steps_per_second": 4.69, "step": 500 }, { "epoch": 2.016, "grad_norm": 3.5379767417907715, "learning_rate": 3.3244444444444445e-05, "loss": 0.1067, "step": 504 }, { "epoch": 2.04, "grad_norm": 1.8406476974487305, "learning_rate": 3.297777777777778e-05, "loss": 0.1038, "step": 510 }, { "epoch": 2.064, "grad_norm": 1.2835277318954468, "learning_rate": 3.2711111111111115e-05, "loss": 0.0894, "step": 516 }, { "epoch": 2.088, "grad_norm": 0.9669094085693359, "learning_rate": 3.2444444444444446e-05, "loss": 0.0914, "step": 522 }, { "epoch": 2.112, "grad_norm": 1.4967998266220093, "learning_rate": 3.217777777777778e-05, "loss": 0.115, "step": 528 }, { "epoch": 2.136, "grad_norm": 0.5459607243537903, "learning_rate": 3.1911111111111116e-05, "loss": 0.1238, "step": 534 }, { "epoch": 2.16, "grad_norm": 0.6286458969116211, "learning_rate": 3.164444444444444e-05, "loss": 0.078, "step": 540 }, { "epoch": 2.184, "grad_norm": 0.8293291926383972, "learning_rate": 3.137777777777778e-05, "loss": 0.0956, "step": 546 }, { "epoch": 2.208, "grad_norm": 1.015953779220581, "learning_rate": 3.111111111111111e-05, "loss": 0.1022, "step": 552 }, { "epoch": 2.232, "grad_norm": 0.6408522129058838, "learning_rate": 3.084444444444445e-05, "loss": 0.0839, "step": 558 }, { "epoch": 2.2560000000000002, "grad_norm": 1.495943307876587, "learning_rate": 3.057777777777778e-05, "loss": 0.0937, "step": 564 }, { "epoch": 2.2800000000000002, "grad_norm": 1.8174567222595215, "learning_rate": 3.031111111111111e-05, "loss": 0.1216, "step": 570 }, { "epoch": 2.304, "grad_norm": 0.502436637878418, "learning_rate": 3.004444444444445e-05, "loss": 0.0754, "step": 576 }, { "epoch": 2.328, "grad_norm": 5.995917320251465, "learning_rate": 2.9777777777777777e-05, "loss": 0.164, "step": 582 }, { "epoch": 2.352, "grad_norm": 1.4363820552825928, "learning_rate": 2.951111111111111e-05, "loss": 0.1006, "step": 588 }, { "epoch": 2.376, "grad_norm": 0.8328127861022949, "learning_rate": 2.9244444444444446e-05, "loss": 0.0861, "step": 594 }, { "epoch": 2.4, "grad_norm": 0.8516309261322021, "learning_rate": 2.897777777777778e-05, "loss": 0.0852, "step": 600 }, { "epoch": 2.424, "grad_norm": 1.308616042137146, "learning_rate": 2.8711111111111113e-05, "loss": 0.0785, "step": 606 }, { "epoch": 2.448, "grad_norm": 2.7151458263397217, "learning_rate": 2.8444444444444447e-05, "loss": 0.1236, "step": 612 }, { "epoch": 2.472, "grad_norm": 0.8587580919265747, "learning_rate": 2.8177777777777782e-05, "loss": 0.0841, "step": 618 }, { "epoch": 2.496, "grad_norm": 2.1243529319763184, "learning_rate": 2.791111111111111e-05, "loss": 0.0769, "step": 624 }, { "epoch": 2.52, "grad_norm": 1.7264567613601685, "learning_rate": 2.7644444444444445e-05, "loss": 0.1453, "step": 630 }, { "epoch": 2.544, "grad_norm": 0.745203971862793, "learning_rate": 2.737777777777778e-05, "loss": 0.0685, "step": 636 }, { "epoch": 2.568, "grad_norm": 1.8616527318954468, "learning_rate": 2.7111111111111114e-05, "loss": 0.1152, "step": 642 }, { "epoch": 2.592, "grad_norm": 1.0319263935089111, "learning_rate": 2.6844444444444446e-05, "loss": 0.0866, "step": 648 }, { "epoch": 2.616, "grad_norm": 1.321435809135437, "learning_rate": 2.657777777777778e-05, "loss": 0.0979, "step": 654 }, { "epoch": 2.64, "grad_norm": 0.6810425519943237, "learning_rate": 2.6311111111111115e-05, "loss": 0.0746, "step": 660 }, { "epoch": 2.664, "grad_norm": 4.602859020233154, "learning_rate": 2.6044444444444443e-05, "loss": 0.1134, "step": 666 }, { "epoch": 2.6879999999999997, "grad_norm": 0.9797782301902771, "learning_rate": 2.5777777777777778e-05, "loss": 0.0779, "step": 672 }, { "epoch": 2.7119999999999997, "grad_norm": 3.393965005874634, "learning_rate": 2.5511111111111113e-05, "loss": 0.1137, "step": 678 }, { "epoch": 2.7359999999999998, "grad_norm": 1.6358873844146729, "learning_rate": 2.5244444444444447e-05, "loss": 0.0829, "step": 684 }, { "epoch": 2.76, "grad_norm": 0.8677324056625366, "learning_rate": 2.497777777777778e-05, "loss": 0.0655, "step": 690 }, { "epoch": 2.784, "grad_norm": 2.418769121170044, "learning_rate": 2.4711111111111114e-05, "loss": 0.0726, "step": 696 }, { "epoch": 2.808, "grad_norm": 1.677323341369629, "learning_rate": 2.4444444444444445e-05, "loss": 0.0694, "step": 702 }, { "epoch": 2.832, "grad_norm": 1.2437670230865479, "learning_rate": 2.417777777777778e-05, "loss": 0.0986, "step": 708 }, { "epoch": 2.856, "grad_norm": 2.447063684463501, "learning_rate": 2.391111111111111e-05, "loss": 0.1185, "step": 714 }, { "epoch": 2.88, "grad_norm": 0.7579317092895508, "learning_rate": 2.3644444444444446e-05, "loss": 0.0932, "step": 720 }, { "epoch": 2.904, "grad_norm": 1.1533098220825195, "learning_rate": 2.337777777777778e-05, "loss": 0.1013, "step": 726 }, { "epoch": 2.928, "grad_norm": 0.8839074969291687, "learning_rate": 2.3111111111111112e-05, "loss": 0.0912, "step": 732 }, { "epoch": 2.952, "grad_norm": 1.182469129562378, "learning_rate": 2.2844444444444447e-05, "loss": 0.0901, "step": 738 }, { "epoch": 2.976, "grad_norm": 0.9109925627708435, "learning_rate": 2.2577777777777778e-05, "loss": 0.0785, "step": 744 }, { "epoch": 3.0, "grad_norm": 1.4611146450042725, "learning_rate": 2.2311111111111113e-05, "loss": 0.0768, "step": 750 }, { "epoch": 3.0, "eval_accuracy": 0.942, "eval_f1_macro": 0.9199698545563004, "eval_f1_micro": 0.942, "eval_f1_weighted": 0.942299793738485, "eval_loss": 0.13686497509479523, "eval_precision_macro": 0.912222911519835, "eval_precision_micro": 0.942, "eval_precision_weighted": 0.9429745034281065, "eval_recall_macro": 0.9287782509639695, "eval_recall_micro": 0.942, "eval_recall_weighted": 0.942, "eval_runtime": 3.3845, "eval_samples_per_second": 590.924, "eval_steps_per_second": 4.727, "step": 750 }, { "epoch": 3.024, "grad_norm": 5.040261745452881, "learning_rate": 2.2044444444444444e-05, "loss": 0.078, "step": 756 }, { "epoch": 3.048, "grad_norm": 0.5013237595558167, "learning_rate": 2.177777777777778e-05, "loss": 0.0558, "step": 762 }, { "epoch": 3.072, "grad_norm": 1.1819310188293457, "learning_rate": 2.1511111111111114e-05, "loss": 0.0682, "step": 768 }, { "epoch": 3.096, "grad_norm": 0.9416047930717468, "learning_rate": 2.1244444444444445e-05, "loss": 0.0775, "step": 774 }, { "epoch": 3.12, "grad_norm": 2.0191128253936768, "learning_rate": 2.097777777777778e-05, "loss": 0.0645, "step": 780 }, { "epoch": 3.144, "grad_norm": 3.3059170246124268, "learning_rate": 2.071111111111111e-05, "loss": 0.0713, "step": 786 }, { "epoch": 3.168, "grad_norm": 0.6508151888847351, "learning_rate": 2.0444444444444446e-05, "loss": 0.0631, "step": 792 }, { "epoch": 3.192, "grad_norm": 0.7478683590888977, "learning_rate": 2.0177777777777777e-05, "loss": 0.0826, "step": 798 }, { "epoch": 3.216, "grad_norm": 0.5315937995910645, "learning_rate": 1.9911111111111112e-05, "loss": 0.0599, "step": 804 }, { "epoch": 3.24, "grad_norm": 0.6902756690979004, "learning_rate": 1.9644444444444447e-05, "loss": 0.062, "step": 810 }, { "epoch": 3.2640000000000002, "grad_norm": 2.4366424083709717, "learning_rate": 1.9377777777777778e-05, "loss": 0.0552, "step": 816 }, { "epoch": 3.288, "grad_norm": 0.8592342734336853, "learning_rate": 1.9111111111111113e-05, "loss": 0.0473, "step": 822 }, { "epoch": 3.312, "grad_norm": 0.7974979877471924, "learning_rate": 1.8844444444444444e-05, "loss": 0.0843, "step": 828 }, { "epoch": 3.336, "grad_norm": 0.6507941484451294, "learning_rate": 1.8577777777777776e-05, "loss": 0.0547, "step": 834 }, { "epoch": 3.36, "grad_norm": 1.7816904783248901, "learning_rate": 1.8311111111111114e-05, "loss": 0.0734, "step": 840 }, { "epoch": 3.384, "grad_norm": 2.3007311820983887, "learning_rate": 1.8044444444444445e-05, "loss": 0.1033, "step": 846 }, { "epoch": 3.408, "grad_norm": 1.0759015083312988, "learning_rate": 1.777777777777778e-05, "loss": 0.0992, "step": 852 }, { "epoch": 3.432, "grad_norm": 2.1372885704040527, "learning_rate": 1.751111111111111e-05, "loss": 0.0593, "step": 858 }, { "epoch": 3.456, "grad_norm": 2.228823661804199, "learning_rate": 1.7244444444444446e-05, "loss": 0.0937, "step": 864 }, { "epoch": 3.48, "grad_norm": 1.4194374084472656, "learning_rate": 1.6977777777777777e-05, "loss": 0.0667, "step": 870 }, { "epoch": 3.504, "grad_norm": 0.7095490097999573, "learning_rate": 1.6711111111111112e-05, "loss": 0.0574, "step": 876 }, { "epoch": 3.528, "grad_norm": 3.3914129734039307, "learning_rate": 1.6444444444444447e-05, "loss": 0.0969, "step": 882 }, { "epoch": 3.552, "grad_norm": 1.8268787860870361, "learning_rate": 1.617777777777778e-05, "loss": 0.0716, "step": 888 }, { "epoch": 3.576, "grad_norm": 1.4750251770019531, "learning_rate": 1.5911111111111113e-05, "loss": 0.0859, "step": 894 }, { "epoch": 3.6, "grad_norm": 0.3522164523601532, "learning_rate": 1.5644444444444444e-05, "loss": 0.0505, "step": 900 }, { "epoch": 3.624, "grad_norm": 1.7711290121078491, "learning_rate": 1.537777777777778e-05, "loss": 0.0571, "step": 906 }, { "epoch": 3.648, "grad_norm": 1.3283618688583374, "learning_rate": 1.5111111111111112e-05, "loss": 0.1006, "step": 912 }, { "epoch": 3.672, "grad_norm": 2.121399402618408, "learning_rate": 1.4844444444444444e-05, "loss": 0.0838, "step": 918 }, { "epoch": 3.6959999999999997, "grad_norm": 2.0860633850097656, "learning_rate": 1.4577777777777778e-05, "loss": 0.1117, "step": 924 }, { "epoch": 3.7199999999999998, "grad_norm": 2.2945892810821533, "learning_rate": 1.4311111111111111e-05, "loss": 0.0727, "step": 930 }, { "epoch": 3.7439999999999998, "grad_norm": 1.119083046913147, "learning_rate": 1.4044444444444446e-05, "loss": 0.0485, "step": 936 }, { "epoch": 3.768, "grad_norm": 1.8427727222442627, "learning_rate": 1.3777777777777778e-05, "loss": 0.0574, "step": 942 }, { "epoch": 3.792, "grad_norm": 1.5588749647140503, "learning_rate": 1.351111111111111e-05, "loss": 0.0784, "step": 948 }, { "epoch": 3.816, "grad_norm": 1.580621361732483, "learning_rate": 1.3244444444444445e-05, "loss": 0.054, "step": 954 }, { "epoch": 3.84, "grad_norm": 4.9234843254089355, "learning_rate": 1.2977777777777777e-05, "loss": 0.0976, "step": 960 }, { "epoch": 3.864, "grad_norm": 1.1440095901489258, "learning_rate": 1.2711111111111113e-05, "loss": 0.0586, "step": 966 }, { "epoch": 3.888, "grad_norm": 1.0315749645233154, "learning_rate": 1.2444444444444445e-05, "loss": 0.0693, "step": 972 }, { "epoch": 3.912, "grad_norm": 1.4318304061889648, "learning_rate": 1.2177777777777778e-05, "loss": 0.069, "step": 978 }, { "epoch": 3.936, "grad_norm": 2.342024087905884, "learning_rate": 1.1911111111111112e-05, "loss": 0.0815, "step": 984 }, { "epoch": 3.96, "grad_norm": 1.2760955095291138, "learning_rate": 1.1644444444444446e-05, "loss": 0.07, "step": 990 }, { "epoch": 3.984, "grad_norm": 1.5678285360336304, "learning_rate": 1.1377777777777779e-05, "loss": 0.0795, "step": 996 }, { "epoch": 4.0, "eval_accuracy": 0.943, "eval_f1_macro": 0.9169826832623412, "eval_f1_micro": 0.943, "eval_f1_weighted": 0.9427985114313238, "eval_loss": 0.13341853022575378, "eval_precision_macro": 0.9227534317185495, "eval_precision_micro": 0.943, "eval_precision_weighted": 0.9430912986498113, "eval_recall_macro": 0.9119580961776227, "eval_recall_micro": 0.943, "eval_recall_weighted": 0.943, "eval_runtime": 3.3617, "eval_samples_per_second": 594.941, "eval_steps_per_second": 4.76, "step": 1000 } ], "logging_steps": 6, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4209928077312000.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }