{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 19890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 0.00013193067062605056, "loss": 17.6929, "step": 100 }, { "epoch": 0.3, "learning_rate": 0.00011254053736637465, "loss": 2.9597, "step": 200 }, { "epoch": 0.45, "learning_rate": 8.317464938377179e-05, "loss": 2.8924, "step": 300 }, { "epoch": 0.6, "learning_rate": 5.04999004691124e-05, "loss": 2.854, "step": 400 }, { "epoch": 0.75, "learning_rate": 2.1934390180170752e-05, "loss": 2.7631, "step": 500 }, { "epoch": 0.9, "learning_rate": 3.963303752660306e-06, "loss": 2.827, "step": 600 }, { "epoch": 1.06, "learning_rate": 6.665901596802237e-07, "loss": 2.7348, "step": 700 }, { "epoch": 1.21, "learning_rate": 1.2792697375844372e-05, "loss": 2.5285, "step": 800 }, { "epoch": 1.36, "learning_rate": 3.758865334885973e-05, "loss": 2.532, "step": 900 }, { "epoch": 1.51, "learning_rate": 6.942506914238186e-05, "loss": 2.5207, "step": 1000 }, { "epoch": 1.66, "learning_rate": 0.00010107417071793643, "loss": 2.5964, "step": 1100 }, { "epoch": 1.81, "learning_rate": 0.00012535070970483918, "loss": 2.651, "step": 1200 }, { "epoch": 1.96, "learning_rate": 0.00013674321963112474, "loss": 2.6082, "step": 1300 }, { "epoch": 2.11, "learning_rate": 0.0001326652759440951, "loss": 2.4929, "step": 1400 }, { "epoch": 2.26, "learning_rate": 0.00011404268812251294, "loss": 2.4281, "step": 1500 }, { "epoch": 2.41, "learning_rate": 8.510331453017784e-05, "loss": 2.4293, "step": 1600 }, { "epoch": 2.56, "learning_rate": 5.2417218031482405e-05, "loss": 2.4101, "step": 1700 }, { "epoch": 2.71, "learning_rate": 2.3405074409295983e-05, "loss": 2.3587, "step": 1800 }, { "epoch": 2.87, "learning_rate": 4.653467399454465e-06, "loss": 2.307, "step": 1900 }, { "epoch": 3.02, "learning_rate": 4.1954639981762966e-07, "loss": 2.3605, "step": 2000 }, { "epoch": 3.17, "learning_rate": 1.1664532186009215e-05, "loss": 2.137, "step": 2100 }, { "epoch": 3.32, "learning_rate": 3.5835492391032385e-05, "loss": 2.0627, "step": 2200 }, { "epoch": 3.47, "learning_rate": 6.744492993645014e-05, "loss": 2.1204, "step": 2300 }, { "epoch": 3.62, "learning_rate": 9.931660131723702e-05, "loss": 2.1823, "step": 2400 }, { "epoch": 3.77, "learning_rate": 0.00012421472847147613, "loss": 2.2277, "step": 2500 }, { "epoch": 3.92, "learning_rate": 0.000136486726691794, "loss": 2.2512, "step": 2600 }, { "epoch": 4.07, "learning_rate": 0.00013334650250861535, "loss": 2.0589, "step": 2700 }, { "epoch": 4.22, "learning_rate": 0.00011550697633903009, "loss": 2.0364, "step": 2800 }, { "epoch": 4.37, "learning_rate": 8.701822922681755e-05, "loss": 2.079, "step": 2900 }, { "epoch": 4.52, "learning_rate": 5.4348018978061915e-05, "loss": 2.0451, "step": 3000 }, { "epoch": 4.68, "learning_rate": 2.4913414743980736e-05, "loss": 2.0148, "step": 3100 }, { "epoch": 4.83, "learning_rate": 5.396910863624203e-06, "loss": 2.0322, "step": 3200 }, { "epoch": 4.98, "learning_rate": 2.2931013159956905e-07, "loss": 2.0037, "step": 3300 }, { "epoch": 5.13, "learning_rate": 1.0583805241907789e-05, "loss": 1.7858, "step": 3400 }, { "epoch": 5.28, "learning_rate": 3.410963059878637e-05, "loss": 1.7749, "step": 3500 }, { "epoch": 5.43, "learning_rate": 6.546575312706341e-05, "loss": 1.753, "step": 3600 }, { "epoch": 5.58, "learning_rate": 9.753343905259311e-05, "loss": 1.8774, "step": 3700 }, { "epoch": 5.73, "learning_rate": 0.00012303240942344423, "loss": 1.8664, "step": 3800 }, { "epoch": 5.88, "learning_rate": 0.0001361736709920872, "loss": 1.9253, "step": 3900 }, { "epoch": 6.03, "learning_rate": 0.00013397378272625676, "loss": 1.895, "step": 4000 }, { "epoch": 6.18, "learning_rate": 0.00011693218198091279, "loss": 1.6965, "step": 4100 }, { "epoch": 6.33, "learning_rate": 8.891779797974934e-05, "loss": 1.7647, "step": 4200 }, { "epoch": 6.49, "learning_rate": 5.629069457859385e-05, "loss": 1.7246, "step": 4300 }, { "epoch": 6.64, "learning_rate": 2.645815444528567e-05, "loss": 1.7252, "step": 4400 }, { "epoch": 6.79, "learning_rate": 6.193014713115989e-06, "loss": 1.7153, "step": 4500 }, { "epoch": 6.94, "learning_rate": 9.603985859484616e-08, "loss": 1.7007, "step": 4600 }, { "epoch": 7.09, "learning_rate": 9.55141699790193e-06, "loss": 1.5635, "step": 4700 }, { "epoch": 7.24, "learning_rate": 3.2412505948464725e-05, "loss": 1.4871, "step": 4800 }, { "epoch": 7.39, "learning_rate": 6.348918775091967e-05, "loss": 1.4965, "step": 4900 }, { "epoch": 7.54, "learning_rate": 9.572616964271242e-05, "loss": 1.5202, "step": 5000 }, { "epoch": 7.69, "learning_rate": 0.00012180473766095787, "loss": 1.5508, "step": 5100 }, { "epoch": 7.84, "learning_rate": 0.0001358043133678906, "loss": 1.658, "step": 5200 }, { "epoch": 7.99, "learning_rate": 0.0001345465939513953, "loss": 1.6614, "step": 5300 }, { "epoch": 8.14, "learning_rate": 0.0001183171175764834, "loss": 1.4421, "step": 5400 }, { "epoch": 8.3, "learning_rate": 9.080043808116885e-05, "loss": 1.4517, "step": 5500 }, { "epoch": 8.45, "learning_rate": 5.824362620893997e-05, "loss": 1.5004, "step": 5600 }, { "epoch": 8.6, "learning_rate": 2.803800644656531e-05, "loss": 1.4732, "step": 5700 }, { "epoch": 8.75, "learning_rate": 7.041115639600712e-06, "loss": 1.3825, "step": 5800 }, { "epoch": 8.9, "learning_rate": 1.9846620691268856e-08, "loss": 1.3723, "step": 5900 }, { "epoch": 9.05, "learning_rate": 8.568227632876734e-06, "loss": 1.3401, "step": 6000 }, { "epoch": 9.2, "learning_rate": 3.074553247281859e-05, "loss": 1.2335, "step": 6100 }, { "epoch": 9.35, "learning_rate": 6.151688066888818e-05, "loss": 1.211, "step": 6200 }, { "epoch": 9.5, "learning_rate": 9.389629889221367e-05, "loss": 1.2744, "step": 6300 }, { "epoch": 9.65, "learning_rate": 0.00012053273607180507, "loss": 1.3462, "step": 6400 }, { "epoch": 9.8, "learning_rate": 0.0001353789615654724, "loss": 1.3833, "step": 6500 }, { "epoch": 9.95, "learning_rate": 0.00013506445892160198, "loss": 1.383, "step": 6600 }, { "epoch": 10.11, "learning_rate": 1.3445492880161939e-06, "loss": 1.2022, "step": 6700 }, { "epoch": 10.26, "learning_rate": 1.1808914090066666e-06, "loss": 1.1142, "step": 6800 }, { "epoch": 10.41, "learning_rate": 9.048790708431603e-07, "loss": 1.1743, "step": 6900 }, { "epoch": 10.56, "learning_rate": 5.791749433756745e-07, "loss": 1.1411, "step": 7000 }, { "epoch": 10.71, "learning_rate": 2.7772314933464675e-07, "loss": 1.2239, "step": 7100 }, { "epoch": 10.86, "learning_rate": 6.896184033086834e-08, "loss": 1.1843, "step": 7200 }, { "epoch": 11.01, "learning_rate": 2.8578527430069923e-10, "loss": 1.1689, "step": 7300 }, { "epoch": 11.16, "learning_rate": 8.568227632876734e-08, "loss": 1.105, "step": 7400 }, { "epoch": 11.31, "learning_rate": 3.074553247281864e-07, "loss": 1.1371, "step": 7500 }, { "epoch": 11.46, "learning_rate": 6.151688066888805e-07, "loss": 1.1963, "step": 7600 }, { "epoch": 11.61, "learning_rate": 9.389629889221367e-07, "loss": 1.153, "step": 7700 }, { "epoch": 11.76, "learning_rate": 1.2053273607180523e-06, "loss": 1.1951, "step": 7800 }, { "epoch": 11.92, "learning_rate": 1.353789615654724e-06, "loss": 1.1274, "step": 7900 }, { "epoch": 12.07, "learning_rate": 1.363372539869449e-07, "loss": 1.1804, "step": 8000 }, { "epoch": 12.22, "learning_rate": 1.2362930050095064e-07, "loss": 1.1455, "step": 8100 }, { "epoch": 12.37, "learning_rate": 9.842812692084315e-08, "loss": 1.1767, "step": 8200 }, { "epoch": 12.52, "learning_rate": 6.645511813249419e-08, "loss": 1.147, "step": 8300 }, { "epoch": 12.67, "learning_rate": 3.496905867845774e-08, "loss": 1.114, "step": 8400 }, { "epoch": 12.82, "learning_rate": 1.1118181719517309e-08, "loss": 1.1379, "step": 8500 }, { "epoch": 12.97, "learning_rate": 3.173163117047257e-10, "loss": 1.1261, "step": 8600 }, { "epoch": 13.0, "eval_loss": 3.459977149963379, "eval_runtime": 0.891, "eval_samples_per_second": 76.319, "eval_steps_per_second": 76.319, "step": 8619 }, { "epoch": 13.12, "learning_rate": 5.0185668342927494e-09, "loss": 1.1202, "step": 8700 }, { "epoch": 13.27, "learning_rate": 2.415461538642524e-08, "loss": 1.1387, "step": 8800 }, { "epoch": 13.42, "learning_rate": 5.338103337974863e-08, "loss": 1.1257, "step": 8900 }, { "epoch": 13.57, "learning_rate": 8.606259068723572e-08, "loss": 1.1419, "step": 9000 }, { "epoch": 13.73, "learning_rate": 1.1477964205183373e-07, "loss": 1.1491, "step": 9100 }, { "epoch": 13.88, "learning_rate": 1.330125980932328e-07, "loss": 1.141, "step": 9200 }, { "epoch": 14.0, "eval_loss": 3.4634451866149902, "eval_runtime": 0.8581, "eval_samples_per_second": 79.247, "eval_steps_per_second": 79.247, "step": 9282 }, { "epoch": 14.03, "learning_rate": 1.3662205797010195e-07, "loss": 1.1135, "step": 9300 }, { "epoch": 14.18, "learning_rate": 1.2497725716841956e-07, "loss": 1.1462, "step": 9400 }, { "epoch": 14.33, "learning_rate": 1.0049124874956241e-07, "loss": 1.1353, "step": 9500 }, { "epoch": 14.48, "learning_rate": 6.876501764792728e-08, "loss": 1.1573, "step": 9600 }, { "epoch": 14.63, "learning_rate": 3.700132283557427e-08, "loss": 1.1477, "step": 9700 }, { "epoch": 14.78, "learning_rate": 1.2411428614983945e-08, "loss": 1.0973, "step": 9800 }, { "epoch": 14.93, "learning_rate": 5.779420298980625e-10, "loss": 1.1278, "step": 9900 }, { "epoch": 15.0, "eval_loss": 3.4664907455444336, "eval_runtime": 0.883, "eval_samples_per_second": 77.009, "eval_steps_per_second": 77.009, "step": 9945 }, { "epoch": 15.08, "learning_rate": 4.187401906767171e-09, "loss": 1.125, "step": 10000 }, { "epoch": 15.23, "learning_rate": 2.2420357948166246e-08, "loss": 1.1384, "step": 10100 }, { "epoch": 15.38, "learning_rate": 5.1137409312764306e-08, "loss": 1.1209, "step": 10200 }, { "epoch": 15.54, "learning_rate": 8.381896662025135e-08, "loss": 1.1044, "step": 10300 }, { "epoch": 15.69, "learning_rate": 1.1304538461357478e-07, "loss": 1.1637, "step": 10400 }, { "epoch": 15.84, "learning_rate": 1.3218143316570722e-07, "loss": 1.1661, "step": 10500 }, { "epoch": 15.99, "learning_rate": 1.368826836882953e-07, "loss": 1.1183, "step": 10600 }, { "epoch": 16.0, "eval_loss": 3.4696619510650635, "eval_runtime": 0.872, "eval_samples_per_second": 77.979, "eval_steps_per_second": 77.979, "step": 10608 }, { "epoch": 16.14, "learning_rate": 1.2608181828048274e-07, "loss": 1.1561, "step": 10700 }, { "epoch": 16.29, "learning_rate": 1.0223094132154226e-07, "loss": 1.1467, "step": 10800 }, { "epoch": 16.44, "learning_rate": 7.074488186750591e-08, "loss": 1.1874, "step": 10900 }, { "epoch": 16.59, "learning_rate": 3.8771873079156884e-08, "loss": 1.1281, "step": 11000 }, { "epoch": 16.74, "learning_rate": 1.3570699499049336e-08, "loss": 1.0814, "step": 11100 }, { "epoch": 16.89, "learning_rate": 8.627460130551071e-10, "loss": 1.1048, "step": 11200 }, { "epoch": 17.0, "eval_loss": 3.4714086055755615, "eval_runtime": 0.874, "eval_samples_per_second": 77.801, "eval_steps_per_second": 77.801, "step": 11271 }, { "epoch": 17.04, "learning_rate": 3.5330803651032965e-09, "loss": 1.1379, "step": 11300 }, { "epoch": 17.19, "learning_rate": 2.0975460526287258e-08, "loss": 1.1625, "step": 11400 }, { "epoch": 17.35, "learning_rate": 4.922996891933192e-08, "loss": 1.1556, "step": 11500 }, { "epoch": 17.5, "learning_rate": 8.188202660552875e-08, "loss": 1.1082, "step": 11600 }, { "epoch": 17.65, "learning_rate": 1.115186855828889e-07, "loss": 1.1591, "step": 11700 }, { "epoch": 17.8, "learning_rate": 1.3141157932507567e-07, "loss": 1.119, "step": 11800 }, { "epoch": 17.95, "learning_rate": 1.3704445380800497e-07, "loss": 1.1061, "step": 11900 }, { "epoch": 18.0, "eval_loss": 3.4751551151275635, "eval_runtime": 0.87, "eval_samples_per_second": 78.161, "eval_steps_per_second": 78.161, "step": 11934 }, { "epoch": 18.1, "learning_rate": 1.2713848593130804e-07, "loss": 1.1182, "step": 12000 }, { "epoch": 18.25, "learning_rate": 1.0394261282124642e-07, "loss": 1.1469, "step": 12100 }, { "epoch": 18.4, "learning_rate": 7.272295898605559e-08, "loss": 1.1398, "step": 12200 }, { "epoch": 18.55, "learning_rate": 4.05672759158415e-08, "loss": 1.0718, "step": 12300 }, { "epoch": 18.7, "learning_rate": 1.4775820423167982e-08, "loss": 1.1535, "step": 12400 }, { "epoch": 18.85, "learning_rate": 1.2039882169440381e-09, "loss": 1.1471, "step": 12500 }, { "epoch": 19.0, "eval_loss": 3.4772818088531494, "eval_runtime": 0.871, "eval_samples_per_second": 78.07, "eval_steps_per_second": 78.07, "step": 12597 }, { "epoch": 19.0, "learning_rate": 2.9329721396952097e-09, "loss": 1.1334, "step": 12600 }, { "epoch": 19.16, "learning_rate": 1.9570243547781353e-08, "loss": 1.1311, "step": 12700 }, { "epoch": 19.31, "learning_rate": 4.733866750445838e-08, "loss": 1.129, "step": 12800 }, { "epoch": 19.46, "learning_rate": 7.993402009629649e-08, "loss": 1.0708, "step": 12900 }, { "epoch": 19.61, "learning_rate": 1.0995622699438409e-07, "loss": 1.1266, "step": 13000 }, { "epoch": 19.76, "learning_rate": 1.3058939130244216e-07, "loss": 1.146, "step": 13100 }, { "epoch": 19.91, "learning_rate": 1.3716110242414843e-07, "loss": 1.1402, "step": 13200 }, { "epoch": 20.0, "eval_loss": 3.4797611236572266, "eval_runtime": 0.848, "eval_samples_per_second": 80.192, "eval_steps_per_second": 80.192, "step": 13260 }, { "epoch": 20.06, "learning_rate": 1.2830956053906914e-07, "loss": 1.1516, "step": 13300 }, { "epoch": 20.21, "learning_rate": 1.0590224609743171e-07, "loss": 1.1088, "step": 13400 }, { "epoch": 20.36, "learning_rate": 7.502625830800328e-08, "loss": 1.1331, "step": 13500 }, { "epoch": 20.51, "learning_rate": 4.269132667061271e-08, "loss": 1.157, "step": 13600 }, { "epoch": 20.66, "learning_rate": 1.6238402532934746e-08, "loss": 1.1155, "step": 13700 }, { "epoch": 20.81, "learning_rate": 1.673053844704988e-09, "loss": 1.1693, "step": 13800 }, { "epoch": 20.97, "learning_rate": 2.3020300149297553e-09, "loss": 1.0847, "step": 13900 }, { "epoch": 21.0, "eval_loss": 3.4811391830444336, "eval_runtime": 0.8641, "eval_samples_per_second": 78.699, "eval_steps_per_second": 78.699, "step": 13923 }, { "epoch": 21.12, "learning_rate": 1.7982535520914407e-08, "loss": 1.1415, "step": 14000 }, { "epoch": 21.27, "learning_rate": 4.515464856300149e-08, "loss": 1.1699, "step": 14100 }, { "epoch": 21.42, "learning_rate": 7.764952480614377e-08, "loss": 1.0972, "step": 14200 }, { "epoch": 21.57, "learning_rate": 1.0808990091715443e-07, "loss": 1.1351, "step": 14300 }, { "epoch": 21.72, "learning_rate": 1.295649436664554e-07, "loss": 1.1021, "step": 14400 }, { "epoch": 21.87, "learning_rate": 1.3719920609842196e-07, "loss": 1.1462, "step": 14500 }, { "epoch": 22.0, "eval_loss": 3.48413348197937, "eval_runtime": 0.887, "eval_samples_per_second": 76.663, "eval_steps_per_second": 76.663, "step": 14586 }, { "epoch": 22.02, "learning_rate": 1.292594929888637e-07, "loss": 1.1323, "step": 14600 }, { "epoch": 22.17, "learning_rate": 1.0754834557415543e-07, "loss": 1.1255, "step": 14700 }, { "epoch": 22.32, "learning_rate": 7.699481330029191e-08, "loss": 1.1254, "step": 14800 }, { "epoch": 22.47, "learning_rate": 4.453541907189201e-08, "loss": 1.1578, "step": 14900 }, { "epoch": 22.62, "learning_rate": 1.7539370793205484e-08, "loss": 1.1509, "step": 15000 }, { "epoch": 22.78, "learning_rate": 2.135541078398113e-09, "loss": 1.096, "step": 15100 }, { "epoch": 22.93, "learning_rate": 1.821038434527568e-09, "loss": 1.1107, "step": 15200 }, { "epoch": 23.0, "eval_loss": 3.485179901123047, "eval_runtime": 0.876, "eval_samples_per_second": 77.624, "eval_steps_per_second": 77.624, "step": 15249 }, { "epoch": 23.08, "learning_rate": 1.6667263928194815e-08, "loss": 1.1112, "step": 15300 }, { "epoch": 23.23, "learning_rate": 4.330370110778617e-08, "loss": 1.0894, "step": 15400 }, { "epoch": 23.38, "learning_rate": 7.568311933111189e-08, "loss": 1.1571, "step": 15500 }, { "epoch": 23.53, "learning_rate": 1.0645446752718106e-07, "loss": 1.0807, "step": 15600 }, { "epoch": 23.68, "learning_rate": 1.286317723671232e-07, "loss": 1.1122, "step": 15700 }, { "epoch": 23.83, "learning_rate": 1.3718015337930875e-07, "loss": 1.2016, "step": 15800 }, { "epoch": 23.98, "learning_rate": 1.301588843603994e-07, "loss": 1.1192, "step": 15900 }, { "epoch": 24.0, "eval_loss": 3.487265110015869, "eval_runtime": 1.004, "eval_samples_per_second": 67.732, "eval_steps_per_second": 67.732, "step": 15912 }, { "epoch": 24.13, "learning_rate": 1.0916199355343483e-07, "loss": 1.1713, "step": 16000 }, { "epoch": 24.28, "learning_rate": 7.895637379105998e-08, "loss": 1.1412, "step": 16100 }, { "epoch": 24.43, "learning_rate": 4.639956191883156e-08, "loss": 1.1021, "step": 16200 }, { "epoch": 24.59, "learning_rate": 1.8882882423516715e-08, "loss": 1.1353, "step": 16300 }, { "epoch": 24.74, "learning_rate": 2.653406048604756e-09, "loss": 1.0801, "step": 16400 }, { "epoch": 24.89, "learning_rate": 1.3956866321093081e-09, "loss": 1.0868, "step": 16500 }, { "epoch": 25.0, "eval_loss": 3.487858295440674, "eval_runtime": 0.854, "eval_samples_per_second": 79.625, "eval_steps_per_second": 79.625, "step": 16575 }, { "epoch": 25.04, "learning_rate": 1.539526233904202e-08, "loss": 1.0812, "step": 16600 }, { "epoch": 25.19, "learning_rate": 4.1473830357287196e-08, "loss": 1.104, "step": 16700 }, { "epoch": 25.34, "learning_rate": 7.371081224908017e-08, "loss": 1.0943, "step": 16800 }, { "epoch": 25.49, "learning_rate": 1.0478749405153535e-07, "loss": 1.097, "step": 16900 }, { "epoch": 25.64, "learning_rate": 1.2764858300209813e-07, "loss": 1.1316, "step": 17000 }, { "epoch": 25.79, "learning_rate": 1.3710396014140516e-07, "loss": 1.1679, "step": 17100 }, { "epoch": 25.94, "learning_rate": 1.31006985286884e-07, "loss": 1.1313, "step": 17200 }, { "epoch": 26.0, "eval_loss": 3.489774703979492, "eval_runtime": 0.859, "eval_samples_per_second": 79.161, "eval_steps_per_second": 79.161, "step": 17238 }, { "epoch": 26.09, "learning_rate": 1.1100177565244e-07, "loss": 1.1454, "step": 17300 }, { "epoch": 26.24, "learning_rate": 8.123384073832201e-08, "loss": 1.0682, "step": 17400 }, { "epoch": 26.4, "learning_rate": 4.859766378694414e-08, "loss": 1.1414, "step": 17500 }, { "epoch": 26.55, "learning_rate": 2.050258742073295e-08, "loss": 1.146, "step": 17600 }, { "epoch": 26.7, "learning_rate": 3.326994962912756e-09, "loss": 1.145, "step": 17700 }, { "epoch": 26.85, "learning_rate": 9.702355760016704e-10, "loss": 1.1254, "step": 17800 }, { "epoch": 27.0, "learning_rate": 1.3967360831826574e-08, "loss": 1.1033, "step": 17900 }, { "epoch": 27.0, "eval_loss": 3.491461992263794, "eval_runtime": 0.878, "eval_samples_per_second": 77.447, "eval_steps_per_second": 77.447, "step": 17901 }, { "epoch": 27.15, "learning_rate": 3.936765275044558e-08, "loss": 1.1389, "step": 18000 }, { "epoch": 27.3, "learning_rate": 7.140452090629836e-08, "loss": 1.1195, "step": 18100 }, { "epoch": 27.45, "learning_rate": 1.0280468285978317e-07, "loss": 1.0567, "step": 18200 }, { "epoch": 27.6, "learning_rate": 1.2643940663167218e-07, "loss": 1.1261, "step": 18300 }, { "epoch": 27.75, "learning_rate": 1.369429361019015e-07, "loss": 1.1393, "step": 18400 }, { "epoch": 27.9, "learning_rate": 1.3193067062605075e-07, "loss": 1.1578, "step": 18500 }, { "epoch": 28.0, "eval_loss": 3.4938805103302, "eval_runtime": 0.864, "eval_samples_per_second": 78.704, "eval_steps_per_second": 78.704, "step": 18564 }, { "epoch": 28.05, "learning_rate": 1.1254053736637487e-07, "loss": 1.136, "step": 18600 }, { "epoch": 28.21, "learning_rate": 8.317464938377186e-08, "loss": 1.102, "step": 18700 }, { "epoch": 28.36, "learning_rate": 5.0499900469112783e-08, "loss": 1.1325, "step": 18800 }, { "epoch": 28.51, "learning_rate": 2.193439018017092e-08, "loss": 1.1511, "step": 18900 }, { "epoch": 28.66, "learning_rate": 3.963303752660322e-09, "loss": 1.118, "step": 19000 }, { "epoch": 28.81, "learning_rate": 6.66590159680239e-10, "loss": 1.1024, "step": 19100 }, { "epoch": 28.96, "learning_rate": 1.2792697375844275e-08, "loss": 1.0987, "step": 19200 }, { "epoch": 29.0, "eval_loss": 3.4947092533111572, "eval_runtime": 0.879, "eval_samples_per_second": 77.363, "eval_steps_per_second": 77.363, "step": 19227 }, { "epoch": 29.11, "learning_rate": 3.7294625797960914e-08, "loss": 1.135, "step": 19300 }, { "epoch": 29.26, "learning_rate": 6.909504912431355e-08, "loss": 1.1308, "step": 19400 }, { "epoch": 29.41, "learning_rate": 1.0078308218564023e-07, "loss": 1.0927, "step": 19500 }, { "epoch": 29.56, "learning_rate": 1.2516463805405508e-07, "loss": 1.1707, "step": 19600 }, { "epoch": 29.71, "learning_rate": 1.367044084314665e-07, "loss": 1.1217, "step": 19700 }, { "epoch": 29.86, "learning_rate": 1.3278253644438908e-07, "loss": 1.0779, "step": 19800 }, { "epoch": 30.0, "eval_loss": 3.497239112854004, "eval_runtime": 0.879, "eval_samples_per_second": 77.362, "eval_steps_per_second": 77.362, "step": 19890 }, { "epoch": 30.0, "step": 19890, "total_flos": 5197098516480000.0, "train_loss": 0.14960067772038202, "train_runtime": 189.9317, "train_samples_per_second": 104.722, "train_steps_per_second": 104.722 } ], "max_steps": 19890, "num_train_epochs": 30, "total_flos": 5197098516480000.0, "trial_name": null, "trial_params": null }