{ "best_metric": 0.9234042553191489, "best_model_checkpoint": "vit-msn-base-finetuned-lf-invalidation/checkpoint-62", "epoch": 76.8, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.96, "eval_accuracy": 0.6957446808510638, "eval_loss": 0.6511951088905334, "eval_runtime": 4.1128, "eval_samples_per_second": 114.277, "eval_steps_per_second": 3.647, "step": 6 }, { "epoch": 1.6, "grad_norm": 100.99421691894531, "learning_rate": 1.0416666666666668e-05, "loss": 0.7053, "step": 10 }, { "epoch": 1.92, "eval_accuracy": 0.6808510638297872, "eval_loss": 0.6310930848121643, "eval_runtime": 4.1662, "eval_samples_per_second": 112.813, "eval_steps_per_second": 3.6, "step": 12 }, { "epoch": 2.88, "eval_accuracy": 0.7276595744680852, "eval_loss": 0.5360996127128601, "eval_runtime": 4.0581, "eval_samples_per_second": 115.819, "eval_steps_per_second": 3.696, "step": 18 }, { "epoch": 3.2, "grad_norm": 14.486075401306152, "learning_rate": 2.0833333333333336e-05, "loss": 0.5163, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.8680851063829788, "eval_loss": 0.3341110050678253, "eval_runtime": 3.919, "eval_samples_per_second": 119.928, "eval_steps_per_second": 3.827, "step": 25 }, { "epoch": 4.8, "grad_norm": 19.806690216064453, "learning_rate": 3.125e-05, "loss": 0.3242, "step": 30 }, { "epoch": 4.96, "eval_accuracy": 0.8808510638297873, "eval_loss": 0.3167176842689514, "eval_runtime": 3.8854, "eval_samples_per_second": 120.965, "eval_steps_per_second": 3.861, "step": 31 }, { "epoch": 5.92, "eval_accuracy": 0.8191489361702128, "eval_loss": 0.39598795771598816, "eval_runtime": 4.17, "eval_samples_per_second": 112.71, "eval_steps_per_second": 3.597, "step": 37 }, { "epoch": 6.4, "grad_norm": 18.975563049316406, "learning_rate": 4.166666666666667e-05, "loss": 0.2779, "step": 40 }, { "epoch": 6.88, "eval_accuracy": 0.825531914893617, "eval_loss": 0.3817645013332367, "eval_runtime": 3.9864, "eval_samples_per_second": 117.9, "eval_steps_per_second": 3.763, "step": 43 }, { "epoch": 8.0, "grad_norm": 50.12718963623047, "learning_rate": 4.976851851851852e-05, "loss": 0.2348, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 0.7361702127659574, "eval_loss": 0.5018748641014099, "eval_runtime": 4.0244, "eval_samples_per_second": 116.789, "eval_steps_per_second": 3.727, "step": 50 }, { "epoch": 8.96, "eval_accuracy": 0.8851063829787233, "eval_loss": 0.29437732696533203, "eval_runtime": 3.863, "eval_samples_per_second": 121.668, "eval_steps_per_second": 3.883, "step": 56 }, { "epoch": 9.6, "grad_norm": 87.47682189941406, "learning_rate": 4.8611111111111115e-05, "loss": 0.26, "step": 60 }, { "epoch": 9.92, "eval_accuracy": 0.9234042553191489, "eval_loss": 0.24138842523097992, "eval_runtime": 3.9931, "eval_samples_per_second": 117.702, "eval_steps_per_second": 3.756, "step": 62 }, { "epoch": 10.88, "eval_accuracy": 0.8297872340425532, "eval_loss": 0.36644989252090454, "eval_runtime": 3.8761, "eval_samples_per_second": 121.257, "eval_steps_per_second": 3.87, "step": 68 }, { "epoch": 11.2, "grad_norm": 37.262699127197266, "learning_rate": 4.745370370370371e-05, "loss": 0.2778, "step": 70 }, { "epoch": 12.0, "eval_accuracy": 0.9042553191489362, "eval_loss": 0.2505495548248291, "eval_runtime": 3.8837, "eval_samples_per_second": 121.018, "eval_steps_per_second": 3.862, "step": 75 }, { "epoch": 12.8, "grad_norm": 39.793601989746094, "learning_rate": 4.62962962962963e-05, "loss": 0.2271, "step": 80 }, { "epoch": 12.96, "eval_accuracy": 0.6297872340425532, "eval_loss": 0.6277480721473694, "eval_runtime": 3.9373, "eval_samples_per_second": 119.372, "eval_steps_per_second": 3.81, "step": 81 }, { "epoch": 13.92, "eval_accuracy": 0.874468085106383, "eval_loss": 0.275258332490921, "eval_runtime": 3.9907, "eval_samples_per_second": 117.773, "eval_steps_per_second": 3.759, "step": 87 }, { "epoch": 14.4, "grad_norm": 45.56210708618164, "learning_rate": 4.5138888888888894e-05, "loss": 0.2488, "step": 90 }, { "epoch": 14.88, "eval_accuracy": 0.6957446808510638, "eval_loss": 0.6249393820762634, "eval_runtime": 4.032, "eval_samples_per_second": 116.567, "eval_steps_per_second": 3.72, "step": 93 }, { "epoch": 16.0, "grad_norm": 39.393192291259766, "learning_rate": 4.3981481481481486e-05, "loss": 0.2729, "step": 100 }, { "epoch": 16.0, "eval_accuracy": 0.7148936170212766, "eval_loss": 0.519493043422699, "eval_runtime": 4.1537, "eval_samples_per_second": 113.151, "eval_steps_per_second": 3.611, "step": 100 }, { "epoch": 16.96, "eval_accuracy": 0.574468085106383, "eval_loss": 0.7983953952789307, "eval_runtime": 4.0505, "eval_samples_per_second": 116.036, "eval_steps_per_second": 3.703, "step": 106 }, { "epoch": 17.6, "grad_norm": 52.11545944213867, "learning_rate": 4.282407407407408e-05, "loss": 0.3261, "step": 110 }, { "epoch": 17.92, "eval_accuracy": 0.7723404255319148, "eval_loss": 0.4630971848964691, "eval_runtime": 4.0296, "eval_samples_per_second": 116.636, "eval_steps_per_second": 3.722, "step": 112 }, { "epoch": 18.88, "eval_accuracy": 0.5148936170212766, "eval_loss": 1.100958228111267, "eval_runtime": 4.0952, "eval_samples_per_second": 114.768, "eval_steps_per_second": 3.663, "step": 118 }, { "epoch": 19.2, "grad_norm": 80.0373764038086, "learning_rate": 4.166666666666667e-05, "loss": 0.2212, "step": 120 }, { "epoch": 20.0, "eval_accuracy": 0.9170212765957447, "eval_loss": 0.23374585807323456, "eval_runtime": 4.0961, "eval_samples_per_second": 114.744, "eval_steps_per_second": 3.662, "step": 125 }, { "epoch": 20.8, "grad_norm": 2.179731607437134, "learning_rate": 4.0509259259259265e-05, "loss": 0.2802, "step": 130 }, { "epoch": 20.96, "eval_accuracy": 0.7574468085106383, "eval_loss": 0.46376925706863403, "eval_runtime": 4.1103, "eval_samples_per_second": 114.347, "eval_steps_per_second": 3.649, "step": 131 }, { "epoch": 21.92, "eval_accuracy": 0.8361702127659575, "eval_loss": 0.38592880964279175, "eval_runtime": 4.1405, "eval_samples_per_second": 113.513, "eval_steps_per_second": 3.623, "step": 137 }, { "epoch": 22.4, "grad_norm": 4.62510871887207, "learning_rate": 3.935185185185186e-05, "loss": 0.2112, "step": 140 }, { "epoch": 22.88, "eval_accuracy": 0.6893617021276596, "eval_loss": 0.6708246469497681, "eval_runtime": 4.1195, "eval_samples_per_second": 114.091, "eval_steps_per_second": 3.641, "step": 143 }, { "epoch": 24.0, "grad_norm": 5.395915508270264, "learning_rate": 3.8194444444444444e-05, "loss": 0.2231, "step": 150 }, { "epoch": 24.0, "eval_accuracy": 0.8680851063829788, "eval_loss": 0.3386794626712799, "eval_runtime": 4.0237, "eval_samples_per_second": 116.808, "eval_steps_per_second": 3.728, "step": 150 }, { "epoch": 24.96, "eval_accuracy": 0.6553191489361702, "eval_loss": 0.7044735550880432, "eval_runtime": 4.1207, "eval_samples_per_second": 114.059, "eval_steps_per_second": 3.64, "step": 156 }, { "epoch": 25.6, "grad_norm": 12.411273002624512, "learning_rate": 3.7037037037037037e-05, "loss": 0.2037, "step": 160 }, { "epoch": 25.92, "eval_accuracy": 0.8276595744680851, "eval_loss": 0.3957701325416565, "eval_runtime": 4.0677, "eval_samples_per_second": 115.543, "eval_steps_per_second": 3.688, "step": 162 }, { "epoch": 26.88, "eval_accuracy": 0.7702127659574468, "eval_loss": 0.5082454681396484, "eval_runtime": 4.0429, "eval_samples_per_second": 116.254, "eval_steps_per_second": 3.71, "step": 168 }, { "epoch": 27.2, "grad_norm": 13.522443771362305, "learning_rate": 3.587962962962963e-05, "loss": 0.1845, "step": 170 }, { "epoch": 28.0, "eval_accuracy": 0.723404255319149, "eval_loss": 0.5990515351295471, "eval_runtime": 4.0619, "eval_samples_per_second": 115.71, "eval_steps_per_second": 3.693, "step": 175 }, { "epoch": 28.8, "grad_norm": 40.79707336425781, "learning_rate": 3.472222222222222e-05, "loss": 0.1898, "step": 180 }, { "epoch": 28.96, "eval_accuracy": 0.7617021276595745, "eval_loss": 0.510837197303772, "eval_runtime": 4.1139, "eval_samples_per_second": 114.248, "eval_steps_per_second": 3.646, "step": 181 }, { "epoch": 29.92, "eval_accuracy": 0.9085106382978724, "eval_loss": 0.27203500270843506, "eval_runtime": 4.1071, "eval_samples_per_second": 114.435, "eval_steps_per_second": 3.652, "step": 187 }, { "epoch": 30.4, "grad_norm": 34.6284065246582, "learning_rate": 3.3564814814814815e-05, "loss": 0.2118, "step": 190 }, { "epoch": 30.88, "eval_accuracy": 0.7851063829787234, "eval_loss": 0.4935612976551056, "eval_runtime": 4.1447, "eval_samples_per_second": 113.398, "eval_steps_per_second": 3.619, "step": 193 }, { "epoch": 32.0, "grad_norm": 5.604860782623291, "learning_rate": 3.240740740740741e-05, "loss": 0.2097, "step": 200 }, { "epoch": 32.0, "eval_accuracy": 0.8404255319148937, "eval_loss": 0.37482374906539917, "eval_runtime": 4.0858, "eval_samples_per_second": 115.032, "eval_steps_per_second": 3.671, "step": 200 }, { "epoch": 32.96, "eval_accuracy": 0.776595744680851, "eval_loss": 0.5048179626464844, "eval_runtime": 4.0089, "eval_samples_per_second": 117.24, "eval_steps_per_second": 3.742, "step": 206 }, { "epoch": 33.6, "grad_norm": 7.735608100891113, "learning_rate": 3.125e-05, "loss": 0.1704, "step": 210 }, { "epoch": 33.92, "eval_accuracy": 0.7957446808510639, "eval_loss": 0.43682861328125, "eval_runtime": 4.0913, "eval_samples_per_second": 114.879, "eval_steps_per_second": 3.666, "step": 212 }, { "epoch": 34.88, "eval_accuracy": 0.6829787234042554, "eval_loss": 0.6958675384521484, "eval_runtime": 4.1104, "eval_samples_per_second": 114.345, "eval_steps_per_second": 3.649, "step": 218 }, { "epoch": 35.2, "grad_norm": 25.477895736694336, "learning_rate": 3.0092592592592593e-05, "loss": 0.1962, "step": 220 }, { "epoch": 36.0, "eval_accuracy": 0.5957446808510638, "eval_loss": 1.009740948677063, "eval_runtime": 4.0288, "eval_samples_per_second": 116.66, "eval_steps_per_second": 3.723, "step": 225 }, { "epoch": 36.8, "grad_norm": 7.080097198486328, "learning_rate": 2.8935185185185186e-05, "loss": 0.1686, "step": 230 }, { "epoch": 36.96, "eval_accuracy": 0.7914893617021277, "eval_loss": 0.4992178976535797, "eval_runtime": 4.0814, "eval_samples_per_second": 115.157, "eval_steps_per_second": 3.675, "step": 231 }, { "epoch": 37.92, "eval_accuracy": 0.7574468085106383, "eval_loss": 0.5373654365539551, "eval_runtime": 4.2322, "eval_samples_per_second": 111.052, "eval_steps_per_second": 3.544, "step": 237 }, { "epoch": 38.4, "grad_norm": 39.29030227661133, "learning_rate": 2.777777777777778e-05, "loss": 0.1855, "step": 240 }, { "epoch": 38.88, "eval_accuracy": 0.8340425531914893, "eval_loss": 0.371025025844574, "eval_runtime": 4.1514, "eval_samples_per_second": 113.216, "eval_steps_per_second": 3.613, "step": 243 }, { "epoch": 40.0, "grad_norm": 21.52515983581543, "learning_rate": 2.6620370370370372e-05, "loss": 0.1528, "step": 250 }, { "epoch": 40.0, "eval_accuracy": 0.8446808510638298, "eval_loss": 0.3630984425544739, "eval_runtime": 4.1723, "eval_samples_per_second": 112.647, "eval_steps_per_second": 3.595, "step": 250 }, { "epoch": 40.96, "eval_accuracy": 0.7680851063829788, "eval_loss": 0.5588864088058472, "eval_runtime": 4.2314, "eval_samples_per_second": 111.075, "eval_steps_per_second": 3.545, "step": 256 }, { "epoch": 41.6, "grad_norm": 2.9336180686950684, "learning_rate": 2.5462962962962965e-05, "loss": 0.1523, "step": 260 }, { "epoch": 41.92, "eval_accuracy": 0.7808510638297872, "eval_loss": 0.5147323608398438, "eval_runtime": 4.1942, "eval_samples_per_second": 112.059, "eval_steps_per_second": 3.576, "step": 262 }, { "epoch": 42.88, "eval_accuracy": 0.7638297872340426, "eval_loss": 0.5298714637756348, "eval_runtime": 4.0956, "eval_samples_per_second": 114.756, "eval_steps_per_second": 3.662, "step": 268 }, { "epoch": 43.2, "grad_norm": 20.56193733215332, "learning_rate": 2.4305555555555558e-05, "loss": 0.1709, "step": 270 }, { "epoch": 44.0, "eval_accuracy": 0.7446808510638298, "eval_loss": 0.5937234163284302, "eval_runtime": 4.0352, "eval_samples_per_second": 116.474, "eval_steps_per_second": 3.717, "step": 275 }, { "epoch": 44.8, "grad_norm": 17.483304977416992, "learning_rate": 2.314814814814815e-05, "loss": 0.1527, "step": 280 }, { "epoch": 44.96, "eval_accuracy": 0.7382978723404255, "eval_loss": 0.5969159603118896, "eval_runtime": 4.1383, "eval_samples_per_second": 113.574, "eval_steps_per_second": 3.625, "step": 281 }, { "epoch": 45.92, "eval_accuracy": 0.725531914893617, "eval_loss": 0.6439131498336792, "eval_runtime": 4.1256, "eval_samples_per_second": 113.922, "eval_steps_per_second": 3.636, "step": 287 }, { "epoch": 46.4, "grad_norm": 13.123701095581055, "learning_rate": 2.1990740740740743e-05, "loss": 0.1397, "step": 290 }, { "epoch": 46.88, "eval_accuracy": 0.6723404255319149, "eval_loss": 0.7720506191253662, "eval_runtime": 4.0907, "eval_samples_per_second": 114.894, "eval_steps_per_second": 3.667, "step": 293 }, { "epoch": 48.0, "grad_norm": 15.003984451293945, "learning_rate": 2.0833333333333336e-05, "loss": 0.1538, "step": 300 }, { "epoch": 48.0, "eval_accuracy": 0.7702127659574468, "eval_loss": 0.5767794251441956, "eval_runtime": 4.0083, "eval_samples_per_second": 117.257, "eval_steps_per_second": 3.742, "step": 300 }, { "epoch": 48.96, "eval_accuracy": 0.7595744680851064, "eval_loss": 0.5801470875740051, "eval_runtime": 3.9427, "eval_samples_per_second": 119.209, "eval_steps_per_second": 3.805, "step": 306 }, { "epoch": 49.6, "grad_norm": 2.649744987487793, "learning_rate": 1.967592592592593e-05, "loss": 0.1466, "step": 310 }, { "epoch": 49.92, "eval_accuracy": 0.7574468085106383, "eval_loss": 0.5672721266746521, "eval_runtime": 4.0569, "eval_samples_per_second": 115.852, "eval_steps_per_second": 3.697, "step": 312 }, { "epoch": 50.88, "eval_accuracy": 0.7085106382978723, "eval_loss": 0.6468719244003296, "eval_runtime": 4.0234, "eval_samples_per_second": 116.818, "eval_steps_per_second": 3.728, "step": 318 }, { "epoch": 51.2, "grad_norm": 12.5094633102417, "learning_rate": 1.8518518518518518e-05, "loss": 0.1302, "step": 320 }, { "epoch": 52.0, "eval_accuracy": 0.6957446808510638, "eval_loss": 0.7276235222816467, "eval_runtime": 4.0008, "eval_samples_per_second": 117.475, "eval_steps_per_second": 3.749, "step": 325 }, { "epoch": 52.8, "grad_norm": 6.599560737609863, "learning_rate": 1.736111111111111e-05, "loss": 0.1565, "step": 330 }, { "epoch": 52.96, "eval_accuracy": 0.6723404255319149, "eval_loss": 0.8247136473655701, "eval_runtime": 4.0526, "eval_samples_per_second": 115.976, "eval_steps_per_second": 3.701, "step": 331 }, { "epoch": 53.92, "eval_accuracy": 0.7978723404255319, "eval_loss": 0.4810582399368286, "eval_runtime": 4.0005, "eval_samples_per_second": 117.486, "eval_steps_per_second": 3.75, "step": 337 }, { "epoch": 54.4, "grad_norm": 12.574357986450195, "learning_rate": 1.6203703703703704e-05, "loss": 0.1267, "step": 340 }, { "epoch": 54.88, "eval_accuracy": 0.7021276595744681, "eval_loss": 0.6372675895690918, "eval_runtime": 4.047, "eval_samples_per_second": 116.135, "eval_steps_per_second": 3.706, "step": 343 }, { "epoch": 56.0, "grad_norm": 29.667768478393555, "learning_rate": 1.5046296296296297e-05, "loss": 0.1424, "step": 350 }, { "epoch": 56.0, "eval_accuracy": 0.6723404255319149, "eval_loss": 0.7251705527305603, "eval_runtime": 4.0519, "eval_samples_per_second": 115.995, "eval_steps_per_second": 3.702, "step": 350 }, { "epoch": 56.96, "eval_accuracy": 0.7489361702127659, "eval_loss": 0.5696622729301453, "eval_runtime": 4.0808, "eval_samples_per_second": 115.174, "eval_steps_per_second": 3.676, "step": 356 }, { "epoch": 57.6, "grad_norm": 3.276287317276001, "learning_rate": 1.388888888888889e-05, "loss": 0.1053, "step": 360 }, { "epoch": 57.92, "eval_accuracy": 0.6957446808510638, "eval_loss": 0.7066917419433594, "eval_runtime": 4.0845, "eval_samples_per_second": 115.07, "eval_steps_per_second": 3.672, "step": 362 }, { "epoch": 58.88, "eval_accuracy": 0.7063829787234043, "eval_loss": 0.6576955318450928, "eval_runtime": 4.1852, "eval_samples_per_second": 112.301, "eval_steps_per_second": 3.584, "step": 368 }, { "epoch": 59.2, "grad_norm": 7.9189982414245605, "learning_rate": 1.2731481481481482e-05, "loss": 0.1301, "step": 370 }, { "epoch": 60.0, "eval_accuracy": 0.774468085106383, "eval_loss": 0.5325801372528076, "eval_runtime": 4.0787, "eval_samples_per_second": 115.233, "eval_steps_per_second": 3.678, "step": 375 }, { "epoch": 60.8, "grad_norm": 14.686637878417969, "learning_rate": 1.1574074074074075e-05, "loss": 0.0906, "step": 380 }, { "epoch": 60.96, "eval_accuracy": 0.7851063829787234, "eval_loss": 0.546753466129303, "eval_runtime": 4.0812, "eval_samples_per_second": 115.163, "eval_steps_per_second": 3.675, "step": 381 }, { "epoch": 61.92, "eval_accuracy": 0.8276595744680851, "eval_loss": 0.4413163959980011, "eval_runtime": 4.1408, "eval_samples_per_second": 113.504, "eval_steps_per_second": 3.622, "step": 387 }, { "epoch": 62.4, "grad_norm": 9.14445686340332, "learning_rate": 1.0416666666666668e-05, "loss": 0.0974, "step": 390 }, { "epoch": 62.88, "eval_accuracy": 0.7659574468085106, "eval_loss": 0.5478885173797607, "eval_runtime": 4.1286, "eval_samples_per_second": 113.839, "eval_steps_per_second": 3.633, "step": 393 }, { "epoch": 64.0, "grad_norm": 4.1058526039123535, "learning_rate": 9.259259259259259e-06, "loss": 0.1133, "step": 400 }, { "epoch": 64.0, "eval_accuracy": 0.7042553191489361, "eval_loss": 0.7109193801879883, "eval_runtime": 4.2149, "eval_samples_per_second": 111.508, "eval_steps_per_second": 3.559, "step": 400 }, { "epoch": 64.96, "eval_accuracy": 0.7617021276595745, "eval_loss": 0.5734679102897644, "eval_runtime": 4.1133, "eval_samples_per_second": 114.265, "eval_steps_per_second": 3.647, "step": 406 }, { "epoch": 65.6, "grad_norm": 5.876250267028809, "learning_rate": 8.101851851851852e-06, "loss": 0.1189, "step": 410 }, { "epoch": 65.92, "eval_accuracy": 0.8297872340425532, "eval_loss": 0.4084050953388214, "eval_runtime": 4.1883, "eval_samples_per_second": 112.218, "eval_steps_per_second": 3.581, "step": 412 }, { "epoch": 66.88, "eval_accuracy": 0.7489361702127659, "eval_loss": 0.5716192722320557, "eval_runtime": 4.1124, "eval_samples_per_second": 114.289, "eval_steps_per_second": 3.648, "step": 418 }, { "epoch": 67.2, "grad_norm": 2.931035280227661, "learning_rate": 6.944444444444445e-06, "loss": 0.1064, "step": 420 }, { "epoch": 68.0, "eval_accuracy": 0.7553191489361702, "eval_loss": 0.5537174940109253, "eval_runtime": 4.0965, "eval_samples_per_second": 114.731, "eval_steps_per_second": 3.662, "step": 425 }, { "epoch": 68.8, "grad_norm": 4.387136936187744, "learning_rate": 5.787037037037038e-06, "loss": 0.1084, "step": 430 }, { "epoch": 68.96, "eval_accuracy": 0.8021276595744681, "eval_loss": 0.456912100315094, "eval_runtime": 4.1477, "eval_samples_per_second": 113.315, "eval_steps_per_second": 3.616, "step": 431 }, { "epoch": 69.92, "eval_accuracy": 0.7617021276595745, "eval_loss": 0.5227068066596985, "eval_runtime": 4.1656, "eval_samples_per_second": 112.828, "eval_steps_per_second": 3.601, "step": 437 }, { "epoch": 70.4, "grad_norm": 6.693394184112549, "learning_rate": 4.6296296296296296e-06, "loss": 0.1054, "step": 440 }, { "epoch": 70.88, "eval_accuracy": 0.7276595744680852, "eval_loss": 0.5995042324066162, "eval_runtime": 4.1654, "eval_samples_per_second": 112.834, "eval_steps_per_second": 3.601, "step": 443 }, { "epoch": 72.0, "grad_norm": 8.600502014160156, "learning_rate": 3.4722222222222224e-06, "loss": 0.1005, "step": 450 }, { "epoch": 72.0, "eval_accuracy": 0.7638297872340426, "eval_loss": 0.5560170412063599, "eval_runtime": 4.1827, "eval_samples_per_second": 112.367, "eval_steps_per_second": 3.586, "step": 450 }, { "epoch": 72.96, "eval_accuracy": 0.8063829787234043, "eval_loss": 0.45502665638923645, "eval_runtime": 4.2071, "eval_samples_per_second": 111.715, "eval_steps_per_second": 3.565, "step": 456 }, { "epoch": 73.6, "grad_norm": 10.198132514953613, "learning_rate": 2.3148148148148148e-06, "loss": 0.1028, "step": 460 }, { "epoch": 73.92, "eval_accuracy": 0.823404255319149, "eval_loss": 0.4404470920562744, "eval_runtime": 4.1806, "eval_samples_per_second": 112.425, "eval_steps_per_second": 3.588, "step": 462 }, { "epoch": 74.88, "eval_accuracy": 0.7957446808510639, "eval_loss": 0.4761447310447693, "eval_runtime": 4.1871, "eval_samples_per_second": 112.251, "eval_steps_per_second": 3.582, "step": 468 }, { "epoch": 75.2, "grad_norm": 7.506448268890381, "learning_rate": 1.1574074074074074e-06, "loss": 0.0917, "step": 470 }, { "epoch": 76.0, "eval_accuracy": 0.7680851063829788, "eval_loss": 0.5278272032737732, "eval_runtime": 4.1615, "eval_samples_per_second": 112.939, "eval_steps_per_second": 3.604, "step": 475 }, { "epoch": 76.8, "grad_norm": 5.474030494689941, "learning_rate": 0.0, "loss": 0.1009, "step": 480 }, { "epoch": 76.8, "eval_accuracy": 0.7617021276595745, "eval_loss": 0.5345979332923889, "eval_runtime": 4.1813, "eval_samples_per_second": 112.405, "eval_steps_per_second": 3.587, "step": 480 }, { "epoch": 76.8, "step": 480, "total_flos": 4.5903154968099717e+18, "train_loss": 0.19423907659947873, "train_runtime": 1687.359, "train_samples_per_second": 36.554, "train_steps_per_second": 0.284 } ], "logging_steps": 10, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 80, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.5903154968099717e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }