{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.968781408006432, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0610057708161583e-07, "loss": 4.8665, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.1220115416323167e-07, "loss": 5.037, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.183017312448475e-07, "loss": 4.9416, "step": 30 }, { "epoch": 0.0, "learning_rate": 8.244023083264633e-07, "loss": 4.8036, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.0305028854080792e-06, "loss": 4.7498, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.236603462489695e-06, "loss": 4.7409, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.442704039571311e-06, "loss": 4.5851, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6488046166529267e-06, "loss": 4.3809, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.8549051937345424e-06, "loss": 4.0034, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.0610057708161584e-06, "loss": 4.0203, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.267106347897774e-06, "loss": 3.8694, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.47320692497939e-06, "loss": 3.7423, "step": 120 }, { "epoch": 0.02, "learning_rate": 2.679307502061006e-06, "loss": 3.6758, "step": 130 }, { "epoch": 0.02, "learning_rate": 2.885408079142622e-06, "loss": 3.5005, "step": 140 }, { "epoch": 0.02, "learning_rate": 3.0915086562242376e-06, "loss": 3.4159, "step": 150 }, { "epoch": 0.02, "learning_rate": 3.2976092333058533e-06, "loss": 3.2655, "step": 160 }, { "epoch": 0.02, "learning_rate": 3.503709810387469e-06, "loss": 3.2183, "step": 170 }, { "epoch": 0.02, "learning_rate": 3.709810387469085e-06, "loss": 3.0816, "step": 180 }, { "epoch": 0.02, "learning_rate": 3.915910964550701e-06, "loss": 2.9692, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.122011541632317e-06, "loss": 2.8064, "step": 200 }, { "epoch": 0.03, "learning_rate": 4.328112118713933e-06, "loss": 2.7355, "step": 210 }, { "epoch": 0.03, "learning_rate": 4.534212695795548e-06, "loss": 2.5492, "step": 220 }, { "epoch": 0.03, "learning_rate": 4.7403132728771645e-06, "loss": 2.5349, "step": 230 }, { "epoch": 0.03, "learning_rate": 4.94641384995878e-06, "loss": 2.5254, "step": 240 }, { "epoch": 0.03, "learning_rate": 5.152514427040396e-06, "loss": 2.4401, "step": 250 }, { "epoch": 0.03, "learning_rate": 5.358615004122012e-06, "loss": 2.2764, "step": 260 }, { "epoch": 0.03, "learning_rate": 5.5647155812036275e-06, "loss": 2.2592, "step": 270 }, { "epoch": 0.03, "learning_rate": 5.770816158285244e-06, "loss": 2.1613, "step": 280 }, { "epoch": 0.04, "learning_rate": 5.97691673536686e-06, "loss": 2.0636, "step": 290 }, { "epoch": 0.04, "learning_rate": 6.183017312448475e-06, "loss": 2.0125, "step": 300 }, { "epoch": 0.04, "learning_rate": 6.389117889530091e-06, "loss": 1.9533, "step": 310 }, { "epoch": 0.04, "learning_rate": 6.595218466611707e-06, "loss": 1.9327, "step": 320 }, { "epoch": 0.04, "learning_rate": 6.801319043693323e-06, "loss": 1.8861, "step": 330 }, { "epoch": 0.04, "learning_rate": 7.007419620774938e-06, "loss": 1.8622, "step": 340 }, { "epoch": 0.04, "learning_rate": 7.213520197856554e-06, "loss": 1.7515, "step": 350 }, { "epoch": 0.04, "learning_rate": 7.41962077493817e-06, "loss": 1.7603, "step": 360 }, { "epoch": 0.05, "learning_rate": 7.625721352019787e-06, "loss": 1.6659, "step": 370 }, { "epoch": 0.05, "learning_rate": 7.831821929101401e-06, "loss": 1.6916, "step": 380 }, { "epoch": 0.05, "learning_rate": 8.037922506183018e-06, "loss": 1.7078, "step": 390 }, { "epoch": 0.05, "learning_rate": 8.244023083264634e-06, "loss": 1.6047, "step": 400 }, { "epoch": 0.05, "learning_rate": 8.45012366034625e-06, "loss": 1.5654, "step": 410 }, { "epoch": 0.05, "learning_rate": 8.656224237427866e-06, "loss": 1.5927, "step": 420 }, { "epoch": 0.05, "learning_rate": 8.862324814509481e-06, "loss": 1.6095, "step": 430 }, { "epoch": 0.05, "learning_rate": 9.068425391591097e-06, "loss": 1.6156, "step": 440 }, { "epoch": 0.06, "learning_rate": 9.274525968672712e-06, "loss": 1.5368, "step": 450 }, { "epoch": 0.06, "learning_rate": 9.480626545754329e-06, "loss": 1.5421, "step": 460 }, { "epoch": 0.06, "learning_rate": 9.686727122835944e-06, "loss": 1.5215, "step": 470 }, { "epoch": 0.06, "learning_rate": 9.89282769991756e-06, "loss": 1.5233, "step": 480 }, { "epoch": 0.06, "learning_rate": 1.0098928276999175e-05, "loss": 1.4729, "step": 490 }, { "epoch": 0.06, "learning_rate": 1.0305028854080792e-05, "loss": 1.539, "step": 500 }, { "epoch": 0.06, "eval_loss": 1.3461134433746338, "eval_runtime": 5.768, "eval_samples_per_second": 89.806, "eval_steps_per_second": 11.269, "step": 500 }, { "epoch": 0.06, "learning_rate": 1.0511129431162407e-05, "loss": 1.5064, "step": 510 }, { "epoch": 0.06, "learning_rate": 1.0717230008244024e-05, "loss": 1.5028, "step": 520 }, { "epoch": 0.07, "learning_rate": 1.092333058532564e-05, "loss": 1.4774, "step": 530 }, { "epoch": 0.07, "learning_rate": 1.1129431162407255e-05, "loss": 1.4405, "step": 540 }, { "epoch": 0.07, "learning_rate": 1.133553173948887e-05, "loss": 1.4486, "step": 550 }, { "epoch": 0.07, "learning_rate": 1.1541632316570487e-05, "loss": 1.4701, "step": 560 }, { "epoch": 0.07, "learning_rate": 1.1747732893652103e-05, "loss": 1.4776, "step": 570 }, { "epoch": 0.07, "learning_rate": 1.195383347073372e-05, "loss": 1.4349, "step": 580 }, { "epoch": 0.07, "learning_rate": 1.2159934047815335e-05, "loss": 1.4336, "step": 590 }, { "epoch": 0.07, "learning_rate": 1.236603462489695e-05, "loss": 1.4612, "step": 600 }, { "epoch": 0.08, "learning_rate": 1.2572135201978566e-05, "loss": 1.4073, "step": 610 }, { "epoch": 0.08, "learning_rate": 1.2778235779060183e-05, "loss": 1.3991, "step": 620 }, { "epoch": 0.08, "learning_rate": 1.29843363561418e-05, "loss": 1.4243, "step": 630 }, { "epoch": 0.08, "learning_rate": 1.3190436933223413e-05, "loss": 1.4053, "step": 640 }, { "epoch": 0.08, "learning_rate": 1.3396537510305029e-05, "loss": 1.4047, "step": 650 }, { "epoch": 0.08, "learning_rate": 1.3602638087386646e-05, "loss": 1.3824, "step": 660 }, { "epoch": 0.08, "learning_rate": 1.380873866446826e-05, "loss": 1.3764, "step": 670 }, { "epoch": 0.08, "learning_rate": 1.4014839241549876e-05, "loss": 1.4373, "step": 680 }, { "epoch": 0.09, "learning_rate": 1.4220939818631493e-05, "loss": 1.4134, "step": 690 }, { "epoch": 0.09, "learning_rate": 1.4427040395713109e-05, "loss": 1.3613, "step": 700 }, { "epoch": 0.09, "learning_rate": 1.4633140972794724e-05, "loss": 1.416, "step": 710 }, { "epoch": 0.09, "learning_rate": 1.483924154987634e-05, "loss": 1.4148, "step": 720 }, { "epoch": 0.09, "learning_rate": 1.5045342126957956e-05, "loss": 1.3494, "step": 730 }, { "epoch": 0.09, "learning_rate": 1.5251442704039573e-05, "loss": 1.3827, "step": 740 }, { "epoch": 0.09, "learning_rate": 1.5457543281121185e-05, "loss": 1.3406, "step": 750 }, { "epoch": 0.09, "learning_rate": 1.5663643858202802e-05, "loss": 1.404, "step": 760 }, { "epoch": 0.1, "learning_rate": 1.586974443528442e-05, "loss": 1.3474, "step": 770 }, { "epoch": 0.1, "learning_rate": 1.6075845012366037e-05, "loss": 1.3925, "step": 780 }, { "epoch": 0.1, "learning_rate": 1.628194558944765e-05, "loss": 1.3944, "step": 790 }, { "epoch": 0.1, "learning_rate": 1.6488046166529267e-05, "loss": 1.3822, "step": 800 }, { "epoch": 0.1, "learning_rate": 1.6694146743610884e-05, "loss": 1.3885, "step": 810 }, { "epoch": 0.1, "learning_rate": 1.69002473206925e-05, "loss": 1.3641, "step": 820 }, { "epoch": 0.1, "learning_rate": 1.7106347897774115e-05, "loss": 1.4021, "step": 830 }, { "epoch": 0.1, "learning_rate": 1.7312448474855732e-05, "loss": 1.3464, "step": 840 }, { "epoch": 0.11, "learning_rate": 1.7518549051937346e-05, "loss": 1.3972, "step": 850 }, { "epoch": 0.11, "learning_rate": 1.7724649629018963e-05, "loss": 1.3618, "step": 860 }, { "epoch": 0.11, "learning_rate": 1.7930750206100576e-05, "loss": 1.4035, "step": 870 }, { "epoch": 0.11, "learning_rate": 1.8136850783182193e-05, "loss": 1.3637, "step": 880 }, { "epoch": 0.11, "learning_rate": 1.834295136026381e-05, "loss": 1.4274, "step": 890 }, { "epoch": 0.11, "learning_rate": 1.8549051937345424e-05, "loss": 1.3806, "step": 900 }, { "epoch": 0.11, "learning_rate": 1.875515251442704e-05, "loss": 1.3702, "step": 910 }, { "epoch": 0.11, "learning_rate": 1.8961253091508658e-05, "loss": 1.3692, "step": 920 }, { "epoch": 0.12, "learning_rate": 1.9167353668590275e-05, "loss": 1.3833, "step": 930 }, { "epoch": 0.12, "learning_rate": 1.937345424567189e-05, "loss": 1.3583, "step": 940 }, { "epoch": 0.12, "learning_rate": 1.9579554822753506e-05, "loss": 1.3478, "step": 950 }, { "epoch": 0.12, "learning_rate": 1.978565539983512e-05, "loss": 1.3563, "step": 960 }, { "epoch": 0.12, "learning_rate": 1.9991755976916736e-05, "loss": 1.3484, "step": 970 }, { "epoch": 0.12, "learning_rate": 2.019785655399835e-05, "loss": 1.3945, "step": 980 }, { "epoch": 0.12, "learning_rate": 2.0403957131079967e-05, "loss": 1.3447, "step": 990 }, { "epoch": 0.12, "learning_rate": 2.0610057708161584e-05, "loss": 1.3477, "step": 1000 }, { "epoch": 0.12, "eval_loss": 1.2597277164459229, "eval_runtime": 5.7653, "eval_samples_per_second": 89.848, "eval_steps_per_second": 11.274, "step": 1000 }, { "epoch": 0.12, "learning_rate": 2.08161582852432e-05, "loss": 1.3636, "step": 1010 }, { "epoch": 0.13, "learning_rate": 2.1022258862324815e-05, "loss": 1.3576, "step": 1020 }, { "epoch": 0.13, "learning_rate": 2.122835943940643e-05, "loss": 1.3677, "step": 1030 }, { "epoch": 0.13, "learning_rate": 2.143446001648805e-05, "loss": 1.337, "step": 1040 }, { "epoch": 0.13, "learning_rate": 2.1640560593569666e-05, "loss": 1.3743, "step": 1050 }, { "epoch": 0.13, "learning_rate": 2.184666117065128e-05, "loss": 1.3512, "step": 1060 }, { "epoch": 0.13, "learning_rate": 2.2052761747732893e-05, "loss": 1.3447, "step": 1070 }, { "epoch": 0.13, "learning_rate": 2.225886232481451e-05, "loss": 1.3772, "step": 1080 }, { "epoch": 0.13, "learning_rate": 2.2464962901896127e-05, "loss": 1.3257, "step": 1090 }, { "epoch": 0.14, "learning_rate": 2.267106347897774e-05, "loss": 1.3911, "step": 1100 }, { "epoch": 0.14, "learning_rate": 2.2877164056059358e-05, "loss": 1.3638, "step": 1110 }, { "epoch": 0.14, "learning_rate": 2.3083264633140975e-05, "loss": 1.3602, "step": 1120 }, { "epoch": 0.14, "learning_rate": 2.328936521022259e-05, "loss": 1.344, "step": 1130 }, { "epoch": 0.14, "learning_rate": 2.3495465787304205e-05, "loss": 1.3768, "step": 1140 }, { "epoch": 0.14, "learning_rate": 2.3701566364385822e-05, "loss": 1.3314, "step": 1150 }, { "epoch": 0.14, "learning_rate": 2.390766694146744e-05, "loss": 1.3609, "step": 1160 }, { "epoch": 0.14, "learning_rate": 2.4113767518549053e-05, "loss": 1.3948, "step": 1170 }, { "epoch": 0.15, "learning_rate": 2.431986809563067e-05, "loss": 1.3339, "step": 1180 }, { "epoch": 0.15, "learning_rate": 2.4525968672712284e-05, "loss": 1.3364, "step": 1190 }, { "epoch": 0.15, "learning_rate": 2.47320692497939e-05, "loss": 1.3286, "step": 1200 }, { "epoch": 0.15, "learning_rate": 2.4938169826875514e-05, "loss": 1.3242, "step": 1210 }, { "epoch": 0.15, "learning_rate": 2.514427040395713e-05, "loss": 1.3238, "step": 1220 }, { "epoch": 0.15, "learning_rate": 2.5350370981038745e-05, "loss": 1.3601, "step": 1230 }, { "epoch": 0.15, "learning_rate": 2.5556471558120365e-05, "loss": 1.3517, "step": 1240 }, { "epoch": 0.15, "learning_rate": 2.576257213520198e-05, "loss": 1.3391, "step": 1250 }, { "epoch": 0.16, "learning_rate": 2.59686727122836e-05, "loss": 1.3292, "step": 1260 }, { "epoch": 0.16, "learning_rate": 2.6174773289365213e-05, "loss": 1.3411, "step": 1270 }, { "epoch": 0.16, "learning_rate": 2.6380873866446827e-05, "loss": 1.3241, "step": 1280 }, { "epoch": 0.16, "learning_rate": 2.6586974443528444e-05, "loss": 1.3354, "step": 1290 }, { "epoch": 0.16, "learning_rate": 2.6793075020610057e-05, "loss": 1.3345, "step": 1300 }, { "epoch": 0.16, "learning_rate": 2.699917559769167e-05, "loss": 1.3384, "step": 1310 }, { "epoch": 0.16, "learning_rate": 2.720527617477329e-05, "loss": 1.3396, "step": 1320 }, { "epoch": 0.16, "learning_rate": 2.7411376751854905e-05, "loss": 1.3133, "step": 1330 }, { "epoch": 0.17, "learning_rate": 2.761747732893652e-05, "loss": 1.328, "step": 1340 }, { "epoch": 0.17, "learning_rate": 2.782357790601814e-05, "loss": 1.3167, "step": 1350 }, { "epoch": 0.17, "learning_rate": 2.8029678483099753e-05, "loss": 1.3062, "step": 1360 }, { "epoch": 0.17, "learning_rate": 2.8235779060181373e-05, "loss": 1.2965, "step": 1370 }, { "epoch": 0.17, "learning_rate": 2.8441879637262987e-05, "loss": 1.3645, "step": 1380 }, { "epoch": 0.17, "learning_rate": 2.86479802143446e-05, "loss": 1.3682, "step": 1390 }, { "epoch": 0.17, "learning_rate": 2.8854080791426218e-05, "loss": 1.3035, "step": 1400 }, { "epoch": 0.17, "learning_rate": 2.906018136850783e-05, "loss": 1.3153, "step": 1410 }, { "epoch": 0.18, "learning_rate": 2.9266281945589448e-05, "loss": 1.3141, "step": 1420 }, { "epoch": 0.18, "learning_rate": 2.9472382522671065e-05, "loss": 1.3191, "step": 1430 }, { "epoch": 0.18, "learning_rate": 2.967848309975268e-05, "loss": 1.3252, "step": 1440 }, { "epoch": 0.18, "learning_rate": 2.98845836768343e-05, "loss": 1.3211, "step": 1450 }, { "epoch": 0.18, "learning_rate": 3.0090684253915913e-05, "loss": 1.3037, "step": 1460 }, { "epoch": 0.18, "learning_rate": 3.0296784830997527e-05, "loss": 1.3206, "step": 1470 }, { "epoch": 0.18, "learning_rate": 3.0502885408079147e-05, "loss": 1.2752, "step": 1480 }, { "epoch": 0.18, "learning_rate": 3.070898598516076e-05, "loss": 1.3213, "step": 1490 }, { "epoch": 0.19, "learning_rate": 3.091508656224237e-05, "loss": 1.3303, "step": 1500 }, { "epoch": 0.19, "eval_loss": 1.2215049266815186, "eval_runtime": 5.7676, "eval_samples_per_second": 89.812, "eval_steps_per_second": 11.27, "step": 1500 }, { "epoch": 0.19, "learning_rate": 3.112118713932399e-05, "loss": 1.3255, "step": 1510 }, { "epoch": 0.19, "learning_rate": 3.1327287716405605e-05, "loss": 1.3062, "step": 1520 }, { "epoch": 0.19, "learning_rate": 3.1533388293487225e-05, "loss": 1.2983, "step": 1530 }, { "epoch": 0.19, "learning_rate": 3.173948887056884e-05, "loss": 1.3201, "step": 1540 }, { "epoch": 0.19, "learning_rate": 3.194558944765045e-05, "loss": 1.2797, "step": 1550 }, { "epoch": 0.19, "learning_rate": 3.215169002473207e-05, "loss": 1.3081, "step": 1560 }, { "epoch": 0.19, "learning_rate": 3.235779060181369e-05, "loss": 1.3323, "step": 1570 }, { "epoch": 0.2, "learning_rate": 3.25638911788953e-05, "loss": 1.3209, "step": 1580 }, { "epoch": 0.2, "learning_rate": 3.276999175597692e-05, "loss": 1.3459, "step": 1590 }, { "epoch": 0.2, "learning_rate": 3.2976092333058534e-05, "loss": 1.3325, "step": 1600 }, { "epoch": 0.2, "learning_rate": 3.318219291014015e-05, "loss": 1.3048, "step": 1610 }, { "epoch": 0.2, "learning_rate": 3.338829348722177e-05, "loss": 1.2714, "step": 1620 }, { "epoch": 0.2, "learning_rate": 3.359439406430338e-05, "loss": 1.2817, "step": 1630 }, { "epoch": 0.2, "learning_rate": 3.3800494641385e-05, "loss": 1.3377, "step": 1640 }, { "epoch": 0.2, "learning_rate": 3.4006595218466616e-05, "loss": 1.2925, "step": 1650 }, { "epoch": 0.21, "learning_rate": 3.421269579554823e-05, "loss": 1.3274, "step": 1660 }, { "epoch": 0.21, "learning_rate": 3.441879637262985e-05, "loss": 1.2687, "step": 1670 }, { "epoch": 0.21, "learning_rate": 3.4624896949711464e-05, "loss": 1.3106, "step": 1680 }, { "epoch": 0.21, "learning_rate": 3.483099752679308e-05, "loss": 1.3474, "step": 1690 }, { "epoch": 0.21, "learning_rate": 3.503709810387469e-05, "loss": 1.2755, "step": 1700 }, { "epoch": 0.21, "learning_rate": 3.5243198680956305e-05, "loss": 1.3411, "step": 1710 }, { "epoch": 0.21, "learning_rate": 3.5449299258037925e-05, "loss": 1.3302, "step": 1720 }, { "epoch": 0.21, "learning_rate": 3.565539983511954e-05, "loss": 1.3389, "step": 1730 }, { "epoch": 0.22, "learning_rate": 3.586150041220115e-05, "loss": 1.275, "step": 1740 }, { "epoch": 0.22, "learning_rate": 3.606760098928277e-05, "loss": 1.3162, "step": 1750 }, { "epoch": 0.22, "learning_rate": 3.6273701566364386e-05, "loss": 1.3057, "step": 1760 }, { "epoch": 0.22, "learning_rate": 3.6479802143446e-05, "loss": 1.2945, "step": 1770 }, { "epoch": 0.22, "learning_rate": 3.668590272052762e-05, "loss": 1.2761, "step": 1780 }, { "epoch": 0.22, "learning_rate": 3.6892003297609234e-05, "loss": 1.2827, "step": 1790 }, { "epoch": 0.22, "learning_rate": 3.709810387469085e-05, "loss": 1.2988, "step": 1800 }, { "epoch": 0.22, "learning_rate": 3.730420445177247e-05, "loss": 1.3273, "step": 1810 }, { "epoch": 0.23, "learning_rate": 3.751030502885408e-05, "loss": 1.2644, "step": 1820 }, { "epoch": 0.23, "learning_rate": 3.77164056059357e-05, "loss": 1.3081, "step": 1830 }, { "epoch": 0.23, "learning_rate": 3.7922506183017316e-05, "loss": 1.3072, "step": 1840 }, { "epoch": 0.23, "learning_rate": 3.812860676009893e-05, "loss": 1.2689, "step": 1850 }, { "epoch": 0.23, "learning_rate": 3.833470733718055e-05, "loss": 1.2613, "step": 1860 }, { "epoch": 0.23, "learning_rate": 3.8540807914262164e-05, "loss": 1.3147, "step": 1870 }, { "epoch": 0.23, "learning_rate": 3.874690849134378e-05, "loss": 1.2822, "step": 1880 }, { "epoch": 0.23, "learning_rate": 3.89530090684254e-05, "loss": 1.2943, "step": 1890 }, { "epoch": 0.24, "learning_rate": 3.915910964550701e-05, "loss": 1.2895, "step": 1900 }, { "epoch": 0.24, "learning_rate": 3.9365210222588625e-05, "loss": 1.2749, "step": 1910 }, { "epoch": 0.24, "learning_rate": 3.957131079967024e-05, "loss": 1.2827, "step": 1920 }, { "epoch": 0.24, "learning_rate": 3.977741137675185e-05, "loss": 1.2815, "step": 1930 }, { "epoch": 0.24, "learning_rate": 3.998351195383347e-05, "loss": 1.2576, "step": 1940 }, { "epoch": 0.24, "learning_rate": 4.0189612530915086e-05, "loss": 1.2843, "step": 1950 }, { "epoch": 0.24, "learning_rate": 4.03957131079967e-05, "loss": 1.2842, "step": 1960 }, { "epoch": 0.24, "learning_rate": 4.060181368507832e-05, "loss": 1.2971, "step": 1970 }, { "epoch": 0.24, "learning_rate": 4.0807914262159934e-05, "loss": 1.2993, "step": 1980 }, { "epoch": 0.25, "learning_rate": 4.101401483924155e-05, "loss": 1.297, "step": 1990 }, { "epoch": 0.25, "learning_rate": 4.122011541632317e-05, "loss": 1.2657, "step": 2000 }, { "epoch": 0.25, "eval_loss": 1.1918519735336304, "eval_runtime": 5.7665, "eval_samples_per_second": 89.829, "eval_steps_per_second": 11.272, "step": 2000 }, { "epoch": 0.25, "learning_rate": 4.142621599340478e-05, "loss": 1.2862, "step": 2010 }, { "epoch": 0.25, "learning_rate": 4.16323165704864e-05, "loss": 1.2709, "step": 2020 }, { "epoch": 0.25, "learning_rate": 4.1838417147568016e-05, "loss": 1.2649, "step": 2030 }, { "epoch": 0.25, "learning_rate": 4.204451772464963e-05, "loss": 1.317, "step": 2040 }, { "epoch": 0.25, "learning_rate": 4.225061830173125e-05, "loss": 1.2622, "step": 2050 }, { "epoch": 0.25, "learning_rate": 4.245671887881286e-05, "loss": 1.2988, "step": 2060 }, { "epoch": 0.26, "learning_rate": 4.266281945589448e-05, "loss": 1.281, "step": 2070 }, { "epoch": 0.26, "learning_rate": 4.28689200329761e-05, "loss": 1.2865, "step": 2080 }, { "epoch": 0.26, "learning_rate": 4.307502061005771e-05, "loss": 1.2695, "step": 2090 }, { "epoch": 0.26, "learning_rate": 4.328112118713933e-05, "loss": 1.2806, "step": 2100 }, { "epoch": 0.26, "learning_rate": 4.3487221764220945e-05, "loss": 1.291, "step": 2110 }, { "epoch": 0.26, "learning_rate": 4.369332234130256e-05, "loss": 1.2854, "step": 2120 }, { "epoch": 0.26, "learning_rate": 4.389942291838417e-05, "loss": 1.2617, "step": 2130 }, { "epoch": 0.26, "learning_rate": 4.4105523495465786e-05, "loss": 1.3352, "step": 2140 }, { "epoch": 0.27, "learning_rate": 4.4311624072547406e-05, "loss": 1.2832, "step": 2150 }, { "epoch": 0.27, "learning_rate": 4.451772464962902e-05, "loss": 1.2778, "step": 2160 }, { "epoch": 0.27, "learning_rate": 4.4723825226710634e-05, "loss": 1.2679, "step": 2170 }, { "epoch": 0.27, "learning_rate": 4.4929925803792254e-05, "loss": 1.294, "step": 2180 }, { "epoch": 0.27, "learning_rate": 4.513602638087387e-05, "loss": 1.265, "step": 2190 }, { "epoch": 0.27, "learning_rate": 4.534212695795548e-05, "loss": 1.2549, "step": 2200 }, { "epoch": 0.27, "learning_rate": 4.55482275350371e-05, "loss": 1.2885, "step": 2210 }, { "epoch": 0.27, "learning_rate": 4.5754328112118715e-05, "loss": 1.2858, "step": 2220 }, { "epoch": 0.28, "learning_rate": 4.596042868920033e-05, "loss": 1.2742, "step": 2230 }, { "epoch": 0.28, "learning_rate": 4.616652926628195e-05, "loss": 1.3061, "step": 2240 }, { "epoch": 0.28, "learning_rate": 4.637262984336356e-05, "loss": 1.3293, "step": 2250 }, { "epoch": 0.28, "learning_rate": 4.657873042044518e-05, "loss": 1.2671, "step": 2260 }, { "epoch": 0.28, "learning_rate": 4.67848309975268e-05, "loss": 1.2881, "step": 2270 }, { "epoch": 0.28, "learning_rate": 4.699093157460841e-05, "loss": 1.2959, "step": 2280 }, { "epoch": 0.28, "learning_rate": 4.719703215169003e-05, "loss": 1.3, "step": 2290 }, { "epoch": 0.28, "learning_rate": 4.7403132728771645e-05, "loss": 1.2194, "step": 2300 }, { "epoch": 0.29, "learning_rate": 4.760923330585326e-05, "loss": 1.2769, "step": 2310 }, { "epoch": 0.29, "learning_rate": 4.781533388293488e-05, "loss": 1.2252, "step": 2320 }, { "epoch": 0.29, "learning_rate": 4.802143446001649e-05, "loss": 1.2456, "step": 2330 }, { "epoch": 0.29, "learning_rate": 4.8227535037098106e-05, "loss": 1.3035, "step": 2340 }, { "epoch": 0.29, "learning_rate": 4.8433635614179727e-05, "loss": 1.2018, "step": 2350 }, { "epoch": 0.29, "learning_rate": 4.863973619126134e-05, "loss": 1.2896, "step": 2360 }, { "epoch": 0.29, "learning_rate": 4.8845836768342954e-05, "loss": 1.2485, "step": 2370 }, { "epoch": 0.29, "learning_rate": 4.905193734542457e-05, "loss": 1.2543, "step": 2380 }, { "epoch": 0.3, "learning_rate": 4.925803792250618e-05, "loss": 1.2763, "step": 2390 }, { "epoch": 0.3, "learning_rate": 4.94641384995878e-05, "loss": 1.2606, "step": 2400 }, { "epoch": 0.3, "learning_rate": 4.9670239076669415e-05, "loss": 1.2606, "step": 2410 }, { "epoch": 0.3, "learning_rate": 4.987633965375103e-05, "loss": 1.2778, "step": 2420 }, { "epoch": 0.3, "learning_rate": 4.9999995856365626e-05, "loss": 1.2752, "step": 2430 }, { "epoch": 0.3, "learning_rate": 4.999994924049467e-05, "loss": 1.2708, "step": 2440 }, { "epoch": 0.3, "learning_rate": 4.999985082930671e-05, "loss": 1.257, "step": 2450 }, { "epoch": 0.3, "learning_rate": 4.999970062300561e-05, "loss": 1.2547, "step": 2460 }, { "epoch": 0.31, "learning_rate": 4.999949862190258e-05, "loss": 1.26, "step": 2470 }, { "epoch": 0.31, "learning_rate": 4.999924482641613e-05, "loss": 1.2638, "step": 2480 }, { "epoch": 0.31, "learning_rate": 4.999893923707209e-05, "loss": 1.2996, "step": 2490 }, { "epoch": 0.31, "learning_rate": 4.9998581854503555e-05, "loss": 1.2208, "step": 2500 }, { "epoch": 0.31, "eval_loss": 1.1681606769561768, "eval_runtime": 5.7701, "eval_samples_per_second": 89.774, "eval_steps_per_second": 11.265, "step": 2500 }, { "epoch": 0.31, "learning_rate": 4.9998172679450984e-05, "loss": 1.2741, "step": 2510 }, { "epoch": 0.31, "learning_rate": 4.9997711712762105e-05, "loss": 1.2765, "step": 2520 }, { "epoch": 0.31, "learning_rate": 4.9997198955391945e-05, "loss": 1.2402, "step": 2530 }, { "epoch": 0.31, "learning_rate": 4.9996634408402866e-05, "loss": 1.2759, "step": 2540 }, { "epoch": 0.32, "learning_rate": 4.9996018072964485e-05, "loss": 1.2466, "step": 2550 }, { "epoch": 0.32, "learning_rate": 4.999534995035374e-05, "loss": 1.3292, "step": 2560 }, { "epoch": 0.32, "learning_rate": 4.999463004195487e-05, "loss": 1.2591, "step": 2570 }, { "epoch": 0.32, "learning_rate": 4.999385834925939e-05, "loss": 1.2557, "step": 2580 }, { "epoch": 0.32, "learning_rate": 4.99930348738661e-05, "loss": 1.2687, "step": 2590 }, { "epoch": 0.32, "learning_rate": 4.999215961748109e-05, "loss": 1.2432, "step": 2600 }, { "epoch": 0.32, "learning_rate": 4.999123258191774e-05, "loss": 1.2748, "step": 2610 }, { "epoch": 0.32, "learning_rate": 4.999025376909668e-05, "loss": 1.2481, "step": 2620 }, { "epoch": 0.33, "learning_rate": 4.998922318104585e-05, "loss": 1.2332, "step": 2630 }, { "epoch": 0.33, "learning_rate": 4.998814081990043e-05, "loss": 1.201, "step": 2640 }, { "epoch": 0.33, "learning_rate": 4.998700668790287e-05, "loss": 1.318, "step": 2650 }, { "epoch": 0.33, "learning_rate": 4.9985820787402905e-05, "loss": 1.2837, "step": 2660 }, { "epoch": 0.33, "learning_rate": 4.9984583120857476e-05, "loss": 1.2287, "step": 2670 }, { "epoch": 0.33, "learning_rate": 4.9983293690830816e-05, "loss": 1.2462, "step": 2680 }, { "epoch": 0.33, "learning_rate": 4.9981952499994386e-05, "loss": 1.2682, "step": 2690 }, { "epoch": 0.33, "learning_rate": 4.9980559551126895e-05, "loss": 1.2765, "step": 2700 }, { "epoch": 0.34, "learning_rate": 4.9979114847114265e-05, "loss": 1.2422, "step": 2710 }, { "epoch": 0.34, "learning_rate": 4.9977618390949666e-05, "loss": 1.2488, "step": 2720 }, { "epoch": 0.34, "learning_rate": 4.9976070185733484e-05, "loss": 1.2046, "step": 2730 }, { "epoch": 0.34, "learning_rate": 4.9974470234673296e-05, "loss": 1.3038, "step": 2740 }, { "epoch": 0.34, "learning_rate": 4.9972818541083935e-05, "loss": 1.268, "step": 2750 }, { "epoch": 0.34, "learning_rate": 4.99711151083874e-05, "loss": 1.2072, "step": 2760 }, { "epoch": 0.34, "learning_rate": 4.996935994011288e-05, "loss": 1.2273, "step": 2770 }, { "epoch": 0.34, "learning_rate": 4.9967553039896774e-05, "loss": 1.2566, "step": 2780 }, { "epoch": 0.35, "learning_rate": 4.996569441148264e-05, "loss": 1.2605, "step": 2790 }, { "epoch": 0.35, "learning_rate": 4.996378405872123e-05, "loss": 1.2316, "step": 2800 }, { "epoch": 0.35, "learning_rate": 4.996182198557043e-05, "loss": 1.264, "step": 2810 }, { "epoch": 0.35, "learning_rate": 4.99598081960953e-05, "loss": 1.2703, "step": 2820 }, { "epoch": 0.35, "learning_rate": 4.995774269446805e-05, "loss": 1.2591, "step": 2830 }, { "epoch": 0.35, "learning_rate": 4.9955625484968004e-05, "loss": 1.2575, "step": 2840 }, { "epoch": 0.35, "learning_rate": 4.995345657198165e-05, "loss": 1.2471, "step": 2850 }, { "epoch": 0.35, "learning_rate": 4.995123596000258e-05, "loss": 1.1949, "step": 2860 }, { "epoch": 0.36, "learning_rate": 4.9948963653631484e-05, "loss": 1.2396, "step": 2870 }, { "epoch": 0.36, "learning_rate": 4.994663965757617e-05, "loss": 1.2551, "step": 2880 }, { "epoch": 0.36, "learning_rate": 4.994426397665152e-05, "loss": 1.2288, "step": 2890 }, { "epoch": 0.36, "learning_rate": 4.994183661577954e-05, "loss": 1.2325, "step": 2900 }, { "epoch": 0.36, "learning_rate": 4.993935757998924e-05, "loss": 1.2827, "step": 2910 }, { "epoch": 0.36, "learning_rate": 4.9936826874416754e-05, "loss": 1.2477, "step": 2920 }, { "epoch": 0.36, "learning_rate": 4.993424450430524e-05, "loss": 1.2608, "step": 2930 }, { "epoch": 0.36, "learning_rate": 4.993161047500488e-05, "loss": 1.2493, "step": 2940 }, { "epoch": 0.36, "learning_rate": 4.992892479197291e-05, "loss": 1.2592, "step": 2950 }, { "epoch": 0.37, "learning_rate": 4.992618746077358e-05, "loss": 1.2694, "step": 2960 }, { "epoch": 0.37, "learning_rate": 4.992339848707812e-05, "loss": 1.2459, "step": 2970 }, { "epoch": 0.37, "learning_rate": 4.9920557876664796e-05, "loss": 1.2578, "step": 2980 }, { "epoch": 0.37, "learning_rate": 4.991766563541882e-05, "loss": 1.2377, "step": 2990 }, { "epoch": 0.37, "learning_rate": 4.991472176933238e-05, "loss": 1.2454, "step": 3000 }, { "epoch": 0.37, "eval_loss": 1.1485267877578735, "eval_runtime": 5.7624, "eval_samples_per_second": 89.893, "eval_steps_per_second": 11.28, "step": 3000 }, { "epoch": 0.37, "learning_rate": 4.991172628450464e-05, "loss": 1.2127, "step": 3010 }, { "epoch": 0.37, "learning_rate": 4.990867918714169e-05, "loss": 1.2452, "step": 3020 }, { "epoch": 0.37, "learning_rate": 4.990558048355656e-05, "loss": 1.2212, "step": 3030 }, { "epoch": 0.38, "learning_rate": 4.9902430180169194e-05, "loss": 1.2321, "step": 3040 }, { "epoch": 0.38, "learning_rate": 4.989922828350645e-05, "loss": 1.2246, "step": 3050 }, { "epoch": 0.38, "learning_rate": 4.989597480020206e-05, "loss": 1.2435, "step": 3060 }, { "epoch": 0.38, "learning_rate": 4.989266973699666e-05, "loss": 1.25, "step": 3070 }, { "epoch": 0.38, "learning_rate": 4.9889313100737716e-05, "loss": 1.2689, "step": 3080 }, { "epoch": 0.38, "learning_rate": 4.9885904898379584e-05, "loss": 1.2067, "step": 3090 }, { "epoch": 0.38, "learning_rate": 4.988244513698343e-05, "loss": 1.2217, "step": 3100 }, { "epoch": 0.38, "learning_rate": 4.987893382371723e-05, "loss": 1.2208, "step": 3110 }, { "epoch": 0.39, "learning_rate": 4.9875370965855805e-05, "loss": 1.2536, "step": 3120 }, { "epoch": 0.39, "learning_rate": 4.987175657078074e-05, "loss": 1.2553, "step": 3130 }, { "epoch": 0.39, "learning_rate": 4.9868090645980384e-05, "loss": 1.2373, "step": 3140 }, { "epoch": 0.39, "learning_rate": 4.986437319904987e-05, "loss": 1.2663, "step": 3150 }, { "epoch": 0.39, "learning_rate": 4.986060423769108e-05, "loss": 1.1748, "step": 3160 }, { "epoch": 0.39, "learning_rate": 4.985678376971259e-05, "loss": 1.2302, "step": 3170 }, { "epoch": 0.39, "learning_rate": 4.985291180302972e-05, "loss": 1.2281, "step": 3180 }, { "epoch": 0.39, "learning_rate": 4.984898834566449e-05, "loss": 1.2421, "step": 3190 }, { "epoch": 0.4, "learning_rate": 4.984501340574556e-05, "loss": 1.2311, "step": 3200 }, { "epoch": 0.4, "learning_rate": 4.9840986991508295e-05, "loss": 1.2272, "step": 3210 }, { "epoch": 0.4, "learning_rate": 4.983690911129468e-05, "loss": 1.2348, "step": 3220 }, { "epoch": 0.4, "learning_rate": 4.983277977355335e-05, "loss": 1.2439, "step": 3230 }, { "epoch": 0.4, "learning_rate": 4.982859898683952e-05, "loss": 1.1972, "step": 3240 }, { "epoch": 0.4, "learning_rate": 4.982436675981501e-05, "loss": 1.2263, "step": 3250 }, { "epoch": 0.4, "learning_rate": 4.982008310124824e-05, "loss": 1.2558, "step": 3260 }, { "epoch": 0.4, "learning_rate": 4.981574802001415e-05, "loss": 1.217, "step": 3270 }, { "epoch": 0.41, "learning_rate": 4.9811361525094246e-05, "loss": 1.2253, "step": 3280 }, { "epoch": 0.41, "learning_rate": 4.980692362557654e-05, "loss": 1.2135, "step": 3290 }, { "epoch": 0.41, "learning_rate": 4.9802434330655536e-05, "loss": 1.227, "step": 3300 }, { "epoch": 0.41, "learning_rate": 4.979789364963224e-05, "loss": 1.2456, "step": 3310 }, { "epoch": 0.41, "learning_rate": 4.979330159191411e-05, "loss": 1.2393, "step": 3320 }, { "epoch": 0.41, "learning_rate": 4.978865816701505e-05, "loss": 1.2548, "step": 3330 }, { "epoch": 0.41, "learning_rate": 4.9783963384555386e-05, "loss": 1.2317, "step": 3340 }, { "epoch": 0.41, "learning_rate": 4.977921725426185e-05, "loss": 1.2561, "step": 3350 }, { "epoch": 0.42, "learning_rate": 4.977441978596754e-05, "loss": 1.2392, "step": 3360 }, { "epoch": 0.42, "learning_rate": 4.976957098961195e-05, "loss": 1.1877, "step": 3370 }, { "epoch": 0.42, "learning_rate": 4.976467087524089e-05, "loss": 1.1954, "step": 3380 }, { "epoch": 0.42, "learning_rate": 4.975971945300651e-05, "loss": 1.2321, "step": 3390 }, { "epoch": 0.42, "learning_rate": 4.975471673316724e-05, "loss": 1.2348, "step": 3400 }, { "epoch": 0.42, "learning_rate": 4.9749662726087795e-05, "loss": 1.2135, "step": 3410 }, { "epoch": 0.42, "learning_rate": 4.974455744223916e-05, "loss": 1.2201, "step": 3420 }, { "epoch": 0.42, "learning_rate": 4.973940089219854e-05, "loss": 1.2252, "step": 3430 }, { "epoch": 0.43, "learning_rate": 4.973419308664938e-05, "loss": 1.221, "step": 3440 }, { "epoch": 0.43, "learning_rate": 4.972893403638128e-05, "loss": 1.2328, "step": 3450 }, { "epoch": 0.43, "learning_rate": 4.9723623752290044e-05, "loss": 1.213, "step": 3460 }, { "epoch": 0.43, "learning_rate": 4.97182622453776e-05, "loss": 1.2168, "step": 3470 }, { "epoch": 0.43, "learning_rate": 4.9712849526752006e-05, "loss": 1.2423, "step": 3480 }, { "epoch": 0.43, "learning_rate": 4.970738560762743e-05, "loss": 1.2456, "step": 3490 }, { "epoch": 0.43, "learning_rate": 4.9701870499324124e-05, "loss": 1.2492, "step": 3500 }, { "epoch": 0.43, "eval_loss": 1.128912329673767, "eval_runtime": 5.7611, "eval_samples_per_second": 89.914, "eval_steps_per_second": 11.283, "step": 3500 }, { "epoch": 0.43, "learning_rate": 4.969630421326835e-05, "loss": 1.2526, "step": 3510 }, { "epoch": 0.44, "learning_rate": 4.969068676099246e-05, "loss": 1.2282, "step": 3520 }, { "epoch": 0.44, "learning_rate": 4.9685018154134774e-05, "loss": 1.2369, "step": 3530 }, { "epoch": 0.44, "learning_rate": 4.967929840443962e-05, "loss": 1.2392, "step": 3540 }, { "epoch": 0.44, "learning_rate": 4.9673527523757265e-05, "loss": 1.2325, "step": 3550 }, { "epoch": 0.44, "learning_rate": 4.966770552404391e-05, "loss": 1.2265, "step": 3560 }, { "epoch": 0.44, "learning_rate": 4.9661832417361675e-05, "loss": 1.1968, "step": 3570 }, { "epoch": 0.44, "learning_rate": 4.965590821587858e-05, "loss": 1.2021, "step": 3580 }, { "epoch": 0.44, "learning_rate": 4.9649932931868455e-05, "loss": 1.215, "step": 3590 }, { "epoch": 0.45, "learning_rate": 4.964390657771102e-05, "loss": 1.2036, "step": 3600 }, { "epoch": 0.45, "learning_rate": 4.9637829165891755e-05, "loss": 1.2095, "step": 3610 }, { "epoch": 0.45, "learning_rate": 4.963170070900197e-05, "loss": 1.179, "step": 3620 }, { "epoch": 0.45, "learning_rate": 4.962552121973869e-05, "loss": 1.2714, "step": 3630 }, { "epoch": 0.45, "learning_rate": 4.9619290710904684e-05, "loss": 1.1883, "step": 3640 }, { "epoch": 0.45, "learning_rate": 4.9613009195408426e-05, "loss": 1.2114, "step": 3650 }, { "epoch": 0.45, "learning_rate": 4.9606676686264076e-05, "loss": 1.2138, "step": 3660 }, { "epoch": 0.45, "learning_rate": 4.960029319659142e-05, "loss": 1.2177, "step": 3670 }, { "epoch": 0.46, "learning_rate": 4.9593858739615886e-05, "loss": 1.2479, "step": 3680 }, { "epoch": 0.46, "learning_rate": 4.9587373328668494e-05, "loss": 1.1912, "step": 3690 }, { "epoch": 0.46, "learning_rate": 4.958083697718583e-05, "loss": 1.237, "step": 3700 }, { "epoch": 0.46, "learning_rate": 4.957424969871e-05, "loss": 1.2338, "step": 3710 }, { "epoch": 0.46, "learning_rate": 4.956761150688866e-05, "loss": 1.2546, "step": 3720 }, { "epoch": 0.46, "learning_rate": 4.956092241547491e-05, "loss": 1.2343, "step": 3730 }, { "epoch": 0.46, "learning_rate": 4.955418243832734e-05, "loss": 1.2286, "step": 3740 }, { "epoch": 0.46, "learning_rate": 4.954739158940993e-05, "loss": 1.2078, "step": 3750 }, { "epoch": 0.47, "learning_rate": 4.9540549882792095e-05, "loss": 1.1959, "step": 3760 }, { "epoch": 0.47, "learning_rate": 4.953365733264859e-05, "loss": 1.2023, "step": 3770 }, { "epoch": 0.47, "learning_rate": 4.95267139532595e-05, "loss": 1.2087, "step": 3780 }, { "epoch": 0.47, "learning_rate": 4.9519719759010264e-05, "loss": 1.2019, "step": 3790 }, { "epoch": 0.47, "learning_rate": 4.9512674764391554e-05, "loss": 1.2305, "step": 3800 }, { "epoch": 0.47, "learning_rate": 4.9505578983999316e-05, "loss": 1.2218, "step": 3810 }, { "epoch": 0.47, "learning_rate": 4.94984324325347e-05, "loss": 1.1838, "step": 3820 }, { "epoch": 0.47, "learning_rate": 4.949123512480406e-05, "loss": 1.2167, "step": 3830 }, { "epoch": 0.48, "learning_rate": 4.94839870757189e-05, "loss": 1.2035, "step": 3840 }, { "epoch": 0.48, "learning_rate": 4.947668830029584e-05, "loss": 1.1523, "step": 3850 }, { "epoch": 0.48, "learning_rate": 4.946933881365662e-05, "loss": 1.2367, "step": 3860 }, { "epoch": 0.48, "learning_rate": 4.946193863102802e-05, "loss": 1.2044, "step": 3870 }, { "epoch": 0.48, "learning_rate": 4.9454487767741864e-05, "loss": 1.1548, "step": 3880 }, { "epoch": 0.48, "learning_rate": 4.944698623923498e-05, "loss": 1.2412, "step": 3890 }, { "epoch": 0.48, "learning_rate": 4.9439434061049174e-05, "loss": 1.2285, "step": 3900 }, { "epoch": 0.48, "learning_rate": 4.943183124883115e-05, "loss": 1.2423, "step": 3910 }, { "epoch": 0.48, "learning_rate": 4.942417781833256e-05, "loss": 1.1856, "step": 3920 }, { "epoch": 0.49, "learning_rate": 4.9416473785409914e-05, "loss": 1.2055, "step": 3930 }, { "epoch": 0.49, "learning_rate": 4.940871916602454e-05, "loss": 1.2728, "step": 3940 }, { "epoch": 0.49, "learning_rate": 4.9400913976242604e-05, "loss": 1.1906, "step": 3950 }, { "epoch": 0.49, "learning_rate": 4.9393058232235026e-05, "loss": 1.2178, "step": 3960 }, { "epoch": 0.49, "learning_rate": 4.938515195027746e-05, "loss": 1.2145, "step": 3970 }, { "epoch": 0.49, "learning_rate": 4.937719514675029e-05, "loss": 1.1979, "step": 3980 }, { "epoch": 0.49, "learning_rate": 4.936918783813855e-05, "loss": 1.1626, "step": 3990 }, { "epoch": 0.49, "learning_rate": 4.936113004103192e-05, "loss": 1.2156, "step": 4000 }, { "epoch": 0.49, "eval_loss": 1.1184728145599365, "eval_runtime": 5.7588, "eval_samples_per_second": 89.949, "eval_steps_per_second": 11.287, "step": 4000 }, { "epoch": 0.5, "learning_rate": 4.935302177212468e-05, "loss": 1.1915, "step": 4010 }, { "epoch": 0.5, "learning_rate": 4.9344863048215665e-05, "loss": 1.2098, "step": 4020 }, { "epoch": 0.5, "learning_rate": 4.9336653886208265e-05, "loss": 1.2082, "step": 4030 }, { "epoch": 0.5, "learning_rate": 4.9328394303110365e-05, "loss": 1.2193, "step": 4040 }, { "epoch": 0.5, "learning_rate": 4.932008431603431e-05, "loss": 1.2345, "step": 4050 }, { "epoch": 0.5, "learning_rate": 4.931172394219686e-05, "loss": 1.1834, "step": 4060 }, { "epoch": 0.5, "learning_rate": 4.93033131989192e-05, "loss": 1.2171, "step": 4070 }, { "epoch": 0.5, "learning_rate": 4.929485210362682e-05, "loss": 1.2302, "step": 4080 }, { "epoch": 0.51, "learning_rate": 4.9286340673849586e-05, "loss": 1.186, "step": 4090 }, { "epoch": 0.51, "learning_rate": 4.9277778927221615e-05, "loss": 1.1853, "step": 4100 }, { "epoch": 0.51, "learning_rate": 4.926916688148127e-05, "loss": 1.2002, "step": 4110 }, { "epoch": 0.51, "learning_rate": 4.926050455447114e-05, "loss": 1.1941, "step": 4120 }, { "epoch": 0.51, "learning_rate": 4.925179196413797e-05, "loss": 1.2144, "step": 4130 }, { "epoch": 0.51, "learning_rate": 4.924302912853268e-05, "loss": 1.2232, "step": 4140 }, { "epoch": 0.51, "learning_rate": 4.9234216065810226e-05, "loss": 1.1838, "step": 4150 }, { "epoch": 0.51, "learning_rate": 4.922535279422968e-05, "loss": 1.195, "step": 4160 }, { "epoch": 0.52, "learning_rate": 4.9216439332154116e-05, "loss": 1.184, "step": 4170 }, { "epoch": 0.52, "learning_rate": 4.920747569805059e-05, "loss": 1.2194, "step": 4180 }, { "epoch": 0.52, "learning_rate": 4.9198461910490115e-05, "loss": 1.1878, "step": 4190 }, { "epoch": 0.52, "learning_rate": 4.91893979881476e-05, "loss": 1.1877, "step": 4200 }, { "epoch": 0.52, "learning_rate": 4.918028394980184e-05, "loss": 1.2224, "step": 4210 }, { "epoch": 0.52, "learning_rate": 4.9171119814335456e-05, "loss": 1.187, "step": 4220 }, { "epoch": 0.52, "learning_rate": 4.9161905600734856e-05, "loss": 1.2038, "step": 4230 }, { "epoch": 0.52, "learning_rate": 4.9152641328090196e-05, "loss": 1.2215, "step": 4240 }, { "epoch": 0.53, "learning_rate": 4.914332701559536e-05, "loss": 1.234, "step": 4250 }, { "epoch": 0.53, "learning_rate": 4.91339626825479e-05, "loss": 1.2328, "step": 4260 }, { "epoch": 0.53, "learning_rate": 4.9124548348349e-05, "loss": 1.1991, "step": 4270 }, { "epoch": 0.53, "learning_rate": 4.911508403250342e-05, "loss": 1.2052, "step": 4280 }, { "epoch": 0.53, "learning_rate": 4.910556975461952e-05, "loss": 1.1817, "step": 4290 }, { "epoch": 0.53, "learning_rate": 4.9096005534409105e-05, "loss": 1.2053, "step": 4300 }, { "epoch": 0.53, "learning_rate": 4.908639139168752e-05, "loss": 1.2306, "step": 4310 }, { "epoch": 0.53, "learning_rate": 4.907672734637349e-05, "loss": 1.2283, "step": 4320 }, { "epoch": 0.54, "learning_rate": 4.906701341848915e-05, "loss": 1.1788, "step": 4330 }, { "epoch": 0.54, "learning_rate": 4.905724962815998e-05, "loss": 1.1947, "step": 4340 }, { "epoch": 0.54, "learning_rate": 4.9047435995614774e-05, "loss": 1.1843, "step": 4350 }, { "epoch": 0.54, "learning_rate": 4.9037572541185575e-05, "loss": 1.2393, "step": 4360 }, { "epoch": 0.54, "learning_rate": 4.9027659285307645e-05, "loss": 1.2127, "step": 4370 }, { "epoch": 0.54, "learning_rate": 4.901769624851946e-05, "loss": 1.1986, "step": 4380 }, { "epoch": 0.54, "learning_rate": 4.900768345146258e-05, "loss": 1.1879, "step": 4390 }, { "epoch": 0.54, "learning_rate": 4.8997620914881704e-05, "loss": 1.2236, "step": 4400 }, { "epoch": 0.55, "learning_rate": 4.8987508659624556e-05, "loss": 1.2315, "step": 4410 }, { "epoch": 0.55, "learning_rate": 4.8977346706641894e-05, "loss": 1.1853, "step": 4420 }, { "epoch": 0.55, "learning_rate": 4.8967135076987404e-05, "loss": 1.1835, "step": 4430 }, { "epoch": 0.55, "learning_rate": 4.8956873791817714e-05, "loss": 1.1746, "step": 4440 }, { "epoch": 0.55, "learning_rate": 4.894656287239234e-05, "loss": 1.197, "step": 4450 }, { "epoch": 0.55, "learning_rate": 4.893620234007361e-05, "loss": 1.1854, "step": 4460 }, { "epoch": 0.55, "learning_rate": 4.892579221632666e-05, "loss": 1.2002, "step": 4470 }, { "epoch": 0.55, "learning_rate": 4.891533252271935e-05, "loss": 1.1718, "step": 4480 }, { "epoch": 0.56, "learning_rate": 4.8904823280922254e-05, "loss": 1.1758, "step": 4490 }, { "epoch": 0.56, "learning_rate": 4.88942645127086e-05, "loss": 1.1966, "step": 4500 }, { "epoch": 0.56, "eval_loss": 1.1033425331115723, "eval_runtime": 5.7651, "eval_samples_per_second": 89.852, "eval_steps_per_second": 11.275, "step": 4500 }, { "epoch": 0.56, "learning_rate": 4.888365623995421e-05, "loss": 1.2163, "step": 4510 }, { "epoch": 0.56, "learning_rate": 4.8872998484637496e-05, "loss": 1.1739, "step": 4520 }, { "epoch": 0.56, "learning_rate": 4.8862291268839364e-05, "loss": 1.1955, "step": 4530 }, { "epoch": 0.56, "learning_rate": 4.885153461474322e-05, "loss": 1.199, "step": 4540 }, { "epoch": 0.56, "learning_rate": 4.8840728544634865e-05, "loss": 1.1892, "step": 4550 }, { "epoch": 0.56, "learning_rate": 4.882987308090251e-05, "loss": 1.2132, "step": 4560 }, { "epoch": 0.57, "learning_rate": 4.881896824603668e-05, "loss": 1.1562, "step": 4570 }, { "epoch": 0.57, "learning_rate": 4.8808014062630195e-05, "loss": 1.204, "step": 4580 }, { "epoch": 0.57, "learning_rate": 4.8797010553378125e-05, "loss": 1.1742, "step": 4590 }, { "epoch": 0.57, "learning_rate": 4.8785957741077724e-05, "loss": 1.1683, "step": 4600 }, { "epoch": 0.57, "learning_rate": 4.877485564862839e-05, "loss": 1.2463, "step": 4610 }, { "epoch": 0.57, "learning_rate": 4.876370429903163e-05, "loss": 1.1703, "step": 4620 }, { "epoch": 0.57, "learning_rate": 4.875250371539101e-05, "loss": 1.1976, "step": 4630 }, { "epoch": 0.57, "learning_rate": 4.8741253920912054e-05, "loss": 1.2428, "step": 4640 }, { "epoch": 0.58, "learning_rate": 4.8729954938902314e-05, "loss": 1.1834, "step": 4650 }, { "epoch": 0.58, "learning_rate": 4.871860679277119e-05, "loss": 1.2081, "step": 4660 }, { "epoch": 0.58, "learning_rate": 4.870720950602997e-05, "loss": 1.2135, "step": 4670 }, { "epoch": 0.58, "learning_rate": 4.8695763102291745e-05, "loss": 1.1742, "step": 4680 }, { "epoch": 0.58, "learning_rate": 4.868426760527136e-05, "loss": 1.1693, "step": 4690 }, { "epoch": 0.58, "learning_rate": 4.86727230387854e-05, "loss": 1.1987, "step": 4700 }, { "epoch": 0.58, "learning_rate": 4.866112942675206e-05, "loss": 1.1927, "step": 4710 }, { "epoch": 0.58, "learning_rate": 4.8649486793191214e-05, "loss": 1.223, "step": 4720 }, { "epoch": 0.59, "learning_rate": 4.8637795162224255e-05, "loss": 1.184, "step": 4730 }, { "epoch": 0.59, "learning_rate": 4.8626054558074094e-05, "loss": 1.1742, "step": 4740 }, { "epoch": 0.59, "learning_rate": 4.861426500506512e-05, "loss": 1.1632, "step": 4750 }, { "epoch": 0.59, "learning_rate": 4.8602426527623116e-05, "loss": 1.2006, "step": 4760 }, { "epoch": 0.59, "learning_rate": 4.859053915027526e-05, "loss": 1.213, "step": 4770 }, { "epoch": 0.59, "learning_rate": 4.857860289765002e-05, "loss": 1.2422, "step": 4780 }, { "epoch": 0.59, "learning_rate": 4.85666177944771e-05, "loss": 1.1855, "step": 4790 }, { "epoch": 0.59, "learning_rate": 4.8554583865587466e-05, "loss": 1.1988, "step": 4800 }, { "epoch": 0.59, "learning_rate": 4.85425011359132e-05, "loss": 1.192, "step": 4810 }, { "epoch": 0.6, "learning_rate": 4.853036963048752e-05, "loss": 1.248, "step": 4820 }, { "epoch": 0.6, "learning_rate": 4.851818937444466e-05, "loss": 1.1865, "step": 4830 }, { "epoch": 0.6, "learning_rate": 4.85059603930199e-05, "loss": 1.2067, "step": 4840 }, { "epoch": 0.6, "learning_rate": 4.849368271154945e-05, "loss": 1.1889, "step": 4850 }, { "epoch": 0.6, "learning_rate": 4.8481356355470406e-05, "loss": 1.1954, "step": 4860 }, { "epoch": 0.6, "learning_rate": 4.846898135032073e-05, "loss": 1.2122, "step": 4870 }, { "epoch": 0.6, "learning_rate": 4.845655772173916e-05, "loss": 1.1761, "step": 4880 }, { "epoch": 0.6, "learning_rate": 4.844408549546519e-05, "loss": 1.2051, "step": 4890 }, { "epoch": 0.61, "learning_rate": 4.843156469733898e-05, "loss": 1.1774, "step": 4900 }, { "epoch": 0.61, "learning_rate": 4.841899535330133e-05, "loss": 1.211, "step": 4910 }, { "epoch": 0.61, "learning_rate": 4.840637748939363e-05, "loss": 1.1574, "step": 4920 }, { "epoch": 0.61, "learning_rate": 4.839371113175777e-05, "loss": 1.1925, "step": 4930 }, { "epoch": 0.61, "learning_rate": 4.838099630663613e-05, "loss": 1.2132, "step": 4940 }, { "epoch": 0.61, "learning_rate": 4.83682330403715e-05, "loss": 1.2053, "step": 4950 }, { "epoch": 0.61, "learning_rate": 4.8355421359407035e-05, "loss": 1.2115, "step": 4960 }, { "epoch": 0.61, "learning_rate": 4.834256129028618e-05, "loss": 1.2162, "step": 4970 }, { "epoch": 0.62, "learning_rate": 4.8329652859652655e-05, "loss": 1.198, "step": 4980 }, { "epoch": 0.62, "learning_rate": 4.831669609425035e-05, "loss": 1.1876, "step": 4990 }, { "epoch": 0.62, "learning_rate": 4.830369102092333e-05, "loss": 1.1742, "step": 5000 }, { "epoch": 0.62, "eval_loss": 1.0941241979599, "eval_runtime": 5.7612, "eval_samples_per_second": 89.912, "eval_steps_per_second": 11.282, "step": 5000 }, { "epoch": 0.62, "learning_rate": 4.829063766661571e-05, "loss": 1.1852, "step": 5010 }, { "epoch": 0.62, "learning_rate": 4.827753605837166e-05, "loss": 1.1893, "step": 5020 }, { "epoch": 0.62, "learning_rate": 4.8264386223335314e-05, "loss": 1.2032, "step": 5030 }, { "epoch": 0.62, "learning_rate": 4.8251188188750705e-05, "loss": 1.1534, "step": 5040 }, { "epoch": 0.62, "learning_rate": 4.823794198196176e-05, "loss": 1.1918, "step": 5050 }, { "epoch": 0.63, "learning_rate": 4.822464763041219e-05, "loss": 1.1831, "step": 5060 }, { "epoch": 0.63, "learning_rate": 4.821130516164546e-05, "loss": 1.2074, "step": 5070 }, { "epoch": 0.63, "learning_rate": 4.819791460330472e-05, "loss": 1.1676, "step": 5080 }, { "epoch": 0.63, "learning_rate": 4.818447598313275e-05, "loss": 1.1626, "step": 5090 }, { "epoch": 0.63, "learning_rate": 4.817098932897191e-05, "loss": 1.2068, "step": 5100 }, { "epoch": 0.63, "learning_rate": 4.815745466876409e-05, "loss": 1.1729, "step": 5110 }, { "epoch": 0.63, "learning_rate": 4.814387203055061e-05, "loss": 1.207, "step": 5120 }, { "epoch": 0.63, "learning_rate": 4.813024144247223e-05, "loss": 1.1348, "step": 5130 }, { "epoch": 0.64, "learning_rate": 4.8116562932769014e-05, "loss": 1.2031, "step": 5140 }, { "epoch": 0.64, "learning_rate": 4.8102836529780336e-05, "loss": 1.1794, "step": 5150 }, { "epoch": 0.64, "learning_rate": 4.808906226194479e-05, "loss": 1.1771, "step": 5160 }, { "epoch": 0.64, "learning_rate": 4.807524015780015e-05, "loss": 1.2, "step": 5170 }, { "epoch": 0.64, "learning_rate": 4.806137024598326e-05, "loss": 1.1804, "step": 5180 }, { "epoch": 0.64, "learning_rate": 4.804745255523005e-05, "loss": 1.1825, "step": 5190 }, { "epoch": 0.64, "learning_rate": 4.803348711437543e-05, "loss": 1.1929, "step": 5200 }, { "epoch": 0.64, "learning_rate": 4.801947395235323e-05, "loss": 1.1898, "step": 5210 }, { "epoch": 0.65, "learning_rate": 4.800541309819616e-05, "loss": 1.151, "step": 5220 }, { "epoch": 0.65, "learning_rate": 4.799130458103572e-05, "loss": 1.1799, "step": 5230 }, { "epoch": 0.65, "learning_rate": 4.7977148430102205e-05, "loss": 1.1825, "step": 5240 }, { "epoch": 0.65, "learning_rate": 4.796294467472454e-05, "loss": 1.1978, "step": 5250 }, { "epoch": 0.65, "learning_rate": 4.794869334433032e-05, "loss": 1.1872, "step": 5260 }, { "epoch": 0.65, "learning_rate": 4.793439446844568e-05, "loss": 1.196, "step": 5270 }, { "epoch": 0.65, "learning_rate": 4.792004807669529e-05, "loss": 1.119, "step": 5280 }, { "epoch": 0.65, "learning_rate": 4.790565419880223e-05, "loss": 1.1771, "step": 5290 }, { "epoch": 0.66, "learning_rate": 4.789121286458798e-05, "loss": 1.1829, "step": 5300 }, { "epoch": 0.66, "learning_rate": 4.787672410397235e-05, "loss": 1.1739, "step": 5310 }, { "epoch": 0.66, "learning_rate": 4.7862187946973404e-05, "loss": 1.2195, "step": 5320 }, { "epoch": 0.66, "learning_rate": 4.784760442370737e-05, "loss": 1.1763, "step": 5330 }, { "epoch": 0.66, "learning_rate": 4.783297356438867e-05, "loss": 1.1886, "step": 5340 }, { "epoch": 0.66, "learning_rate": 4.781829539932975e-05, "loss": 1.2119, "step": 5350 }, { "epoch": 0.66, "learning_rate": 4.7803569958941076e-05, "loss": 1.1745, "step": 5360 }, { "epoch": 0.66, "learning_rate": 4.778879727373107e-05, "loss": 1.1848, "step": 5370 }, { "epoch": 0.67, "learning_rate": 4.777397737430603e-05, "loss": 1.1866, "step": 5380 }, { "epoch": 0.67, "learning_rate": 4.775911029137008e-05, "loss": 1.1717, "step": 5390 }, { "epoch": 0.67, "learning_rate": 4.774419605572508e-05, "loss": 1.1806, "step": 5400 }, { "epoch": 0.67, "learning_rate": 4.772923469827061e-05, "loss": 1.1827, "step": 5410 }, { "epoch": 0.67, "learning_rate": 4.7714226250003844e-05, "loss": 1.1779, "step": 5420 }, { "epoch": 0.67, "learning_rate": 4.769917074201956e-05, "loss": 1.1996, "step": 5430 }, { "epoch": 0.67, "learning_rate": 4.7684068205510006e-05, "loss": 1.2105, "step": 5440 }, { "epoch": 0.67, "learning_rate": 4.766891867176487e-05, "loss": 1.2059, "step": 5450 }, { "epoch": 0.68, "learning_rate": 4.765372217217121e-05, "loss": 1.1851, "step": 5460 }, { "epoch": 0.68, "learning_rate": 4.7638478738213386e-05, "loss": 1.1709, "step": 5470 }, { "epoch": 0.68, "learning_rate": 4.762318840147302e-05, "loss": 1.1775, "step": 5480 }, { "epoch": 0.68, "learning_rate": 4.7607851193628873e-05, "loss": 1.1889, "step": 5490 }, { "epoch": 0.68, "learning_rate": 4.759246714645684e-05, "loss": 1.2105, "step": 5500 }, { "epoch": 0.68, "eval_loss": 1.083298921585083, "eval_runtime": 5.7617, "eval_samples_per_second": 89.904, "eval_steps_per_second": 11.281, "step": 5500 }, { "epoch": 0.68, "learning_rate": 4.757703629182984e-05, "loss": 1.1806, "step": 5510 }, { "epoch": 0.68, "learning_rate": 4.7561558661717785e-05, "loss": 1.1953, "step": 5520 }, { "epoch": 0.68, "learning_rate": 4.75460342881875e-05, "loss": 1.1784, "step": 5530 }, { "epoch": 0.69, "learning_rate": 4.753046320340262e-05, "loss": 1.1747, "step": 5540 }, { "epoch": 0.69, "learning_rate": 4.751484543962361e-05, "loss": 1.1441, "step": 5550 }, { "epoch": 0.69, "learning_rate": 4.7499181029207596e-05, "loss": 1.1629, "step": 5560 }, { "epoch": 0.69, "learning_rate": 4.748347000460837e-05, "loss": 1.1665, "step": 5570 }, { "epoch": 0.69, "learning_rate": 4.7467712398376304e-05, "loss": 1.1578, "step": 5580 }, { "epoch": 0.69, "learning_rate": 4.745190824315828e-05, "loss": 1.1769, "step": 5590 }, { "epoch": 0.69, "learning_rate": 4.7436057571697604e-05, "loss": 1.1652, "step": 5600 }, { "epoch": 0.69, "learning_rate": 4.7420160416833954e-05, "loss": 1.1147, "step": 5610 }, { "epoch": 0.7, "learning_rate": 4.7404216811503344e-05, "loss": 1.1666, "step": 5620 }, { "epoch": 0.7, "learning_rate": 4.738822678873799e-05, "loss": 1.1638, "step": 5630 }, { "epoch": 0.7, "learning_rate": 4.73721903816663e-05, "loss": 1.1673, "step": 5640 }, { "epoch": 0.7, "learning_rate": 4.735610762351278e-05, "loss": 1.1946, "step": 5650 }, { "epoch": 0.7, "learning_rate": 4.733997854759794e-05, "loss": 1.1892, "step": 5660 }, { "epoch": 0.7, "learning_rate": 4.732380318733829e-05, "loss": 1.1751, "step": 5670 }, { "epoch": 0.7, "learning_rate": 4.730758157624619e-05, "loss": 1.1635, "step": 5680 }, { "epoch": 0.7, "learning_rate": 4.729131374792989e-05, "loss": 1.151, "step": 5690 }, { "epoch": 0.71, "learning_rate": 4.72749997360933e-05, "loss": 1.1779, "step": 5700 }, { "epoch": 0.71, "learning_rate": 4.7258639574536114e-05, "loss": 1.1868, "step": 5710 }, { "epoch": 0.71, "learning_rate": 4.724223329715356e-05, "loss": 1.2185, "step": 5720 }, { "epoch": 0.71, "learning_rate": 4.7225780937936445e-05, "loss": 1.1568, "step": 5730 }, { "epoch": 0.71, "learning_rate": 4.720928253097105e-05, "loss": 1.175, "step": 5740 }, { "epoch": 0.71, "learning_rate": 4.719273811043906e-05, "loss": 1.1683, "step": 5750 }, { "epoch": 0.71, "learning_rate": 4.717614771061747e-05, "loss": 1.1569, "step": 5760 }, { "epoch": 0.71, "learning_rate": 4.7159511365878556e-05, "loss": 1.1615, "step": 5770 }, { "epoch": 0.71, "learning_rate": 4.714282911068978e-05, "loss": 1.1535, "step": 5780 }, { "epoch": 0.72, "learning_rate": 4.712610097961372e-05, "loss": 1.1987, "step": 5790 }, { "epoch": 0.72, "learning_rate": 4.7109327007308004e-05, "loss": 1.17, "step": 5800 }, { "epoch": 0.72, "learning_rate": 4.7092507228525216e-05, "loss": 1.1645, "step": 5810 }, { "epoch": 0.72, "learning_rate": 4.707564167811287e-05, "loss": 1.1478, "step": 5820 }, { "epoch": 0.72, "learning_rate": 4.7058730391013305e-05, "loss": 1.1658, "step": 5830 }, { "epoch": 0.72, "learning_rate": 4.70417734022636e-05, "loss": 1.1564, "step": 5840 }, { "epoch": 0.72, "learning_rate": 4.7024770746995526e-05, "loss": 1.184, "step": 5850 }, { "epoch": 0.72, "learning_rate": 4.700772246043549e-05, "loss": 1.1794, "step": 5860 }, { "epoch": 0.73, "learning_rate": 4.69906285779044e-05, "loss": 1.1645, "step": 5870 }, { "epoch": 0.73, "learning_rate": 4.6973489134817664e-05, "loss": 1.1377, "step": 5880 }, { "epoch": 0.73, "learning_rate": 4.695630416668506e-05, "loss": 1.1282, "step": 5890 }, { "epoch": 0.73, "learning_rate": 4.693907370911071e-05, "loss": 1.2042, "step": 5900 }, { "epoch": 0.73, "learning_rate": 4.6921797797792944e-05, "loss": 1.1495, "step": 5910 }, { "epoch": 0.73, "learning_rate": 4.690447646852431e-05, "loss": 1.1368, "step": 5920 }, { "epoch": 0.73, "learning_rate": 4.6887109757191413e-05, "loss": 1.1746, "step": 5930 }, { "epoch": 0.73, "learning_rate": 4.686969769977491e-05, "loss": 1.1704, "step": 5940 }, { "epoch": 0.74, "learning_rate": 4.6852240332349384e-05, "loss": 1.1368, "step": 5950 }, { "epoch": 0.74, "learning_rate": 4.683473769108333e-05, "loss": 1.168, "step": 5960 }, { "epoch": 0.74, "learning_rate": 4.6817189812238975e-05, "loss": 1.1668, "step": 5970 }, { "epoch": 0.74, "learning_rate": 4.6799596732172346e-05, "loss": 1.16, "step": 5980 }, { "epoch": 0.74, "learning_rate": 4.6781958487333066e-05, "loss": 1.1758, "step": 5990 }, { "epoch": 0.74, "learning_rate": 4.676427511426435e-05, "loss": 1.15, "step": 6000 }, { "epoch": 0.74, "eval_loss": 1.0752549171447754, "eval_runtime": 5.7664, "eval_samples_per_second": 89.831, "eval_steps_per_second": 11.272, "step": 6000 }, { "epoch": 0.74, "learning_rate": 4.674654664960292e-05, "loss": 1.1545, "step": 6010 }, { "epoch": 0.74, "learning_rate": 4.6728773130078895e-05, "loss": 1.1549, "step": 6020 }, { "epoch": 0.75, "learning_rate": 4.671095459251576e-05, "loss": 1.1818, "step": 6030 }, { "epoch": 0.75, "learning_rate": 4.6693091073830255e-05, "loss": 1.1497, "step": 6040 }, { "epoch": 0.75, "learning_rate": 4.667518261103233e-05, "loss": 1.1583, "step": 6050 }, { "epoch": 0.75, "learning_rate": 4.665722924122504e-05, "loss": 1.1194, "step": 6060 }, { "epoch": 0.75, "learning_rate": 4.663923100160447e-05, "loss": 1.1617, "step": 6070 }, { "epoch": 0.75, "learning_rate": 4.662118792945967e-05, "loss": 1.1584, "step": 6080 }, { "epoch": 0.75, "learning_rate": 4.660310006217261e-05, "loss": 1.1676, "step": 6090 }, { "epoch": 0.75, "learning_rate": 4.6584967437218016e-05, "loss": 1.176, "step": 6100 }, { "epoch": 0.76, "learning_rate": 4.6566790092163373e-05, "loss": 1.1522, "step": 6110 }, { "epoch": 0.76, "learning_rate": 4.654856806466882e-05, "loss": 1.1381, "step": 6120 }, { "epoch": 0.76, "learning_rate": 4.653030139248704e-05, "loss": 1.1316, "step": 6130 }, { "epoch": 0.76, "learning_rate": 4.651199011346325e-05, "loss": 1.1556, "step": 6140 }, { "epoch": 0.76, "learning_rate": 4.649363426553506e-05, "loss": 1.2094, "step": 6150 }, { "epoch": 0.76, "learning_rate": 4.647523388673244e-05, "loss": 1.1663, "step": 6160 }, { "epoch": 0.76, "learning_rate": 4.645678901517758e-05, "loss": 1.1911, "step": 6170 }, { "epoch": 0.76, "learning_rate": 4.6438299689084894e-05, "loss": 1.1605, "step": 6180 }, { "epoch": 0.77, "learning_rate": 4.641976594676088e-05, "loss": 1.1717, "step": 6190 }, { "epoch": 0.77, "learning_rate": 4.6401187826604054e-05, "loss": 1.1539, "step": 6200 }, { "epoch": 0.77, "learning_rate": 4.638256536710488e-05, "loss": 1.1711, "step": 6210 }, { "epoch": 0.77, "learning_rate": 4.636389860684568e-05, "loss": 1.174, "step": 6220 }, { "epoch": 0.77, "learning_rate": 4.634518758450057e-05, "loss": 1.1408, "step": 6230 }, { "epoch": 0.77, "learning_rate": 4.632643233883537e-05, "loss": 1.1988, "step": 6240 }, { "epoch": 0.77, "learning_rate": 4.630763290870749e-05, "loss": 1.1776, "step": 6250 }, { "epoch": 0.77, "learning_rate": 4.628878933306592e-05, "loss": 1.1396, "step": 6260 }, { "epoch": 0.78, "learning_rate": 4.62699016509511e-05, "loss": 1.1702, "step": 6270 }, { "epoch": 0.78, "learning_rate": 4.625096990149486e-05, "loss": 1.1576, "step": 6280 }, { "epoch": 0.78, "learning_rate": 4.623199412392029e-05, "loss": 1.1564, "step": 6290 }, { "epoch": 0.78, "learning_rate": 4.621297435754175e-05, "loss": 1.1574, "step": 6300 }, { "epoch": 0.78, "learning_rate": 4.61939106417647e-05, "loss": 1.1785, "step": 6310 }, { "epoch": 0.78, "learning_rate": 4.6174803016085676e-05, "loss": 1.1694, "step": 6320 }, { "epoch": 0.78, "learning_rate": 4.615565152009218e-05, "loss": 1.1607, "step": 6330 }, { "epoch": 0.78, "learning_rate": 4.6136456193462604e-05, "loss": 1.1641, "step": 6340 }, { "epoch": 0.79, "learning_rate": 4.611721707596615e-05, "loss": 1.1649, "step": 6350 }, { "epoch": 0.79, "learning_rate": 4.6097934207462734e-05, "loss": 1.1253, "step": 6360 }, { "epoch": 0.79, "learning_rate": 4.6078607627902944e-05, "loss": 1.1772, "step": 6370 }, { "epoch": 0.79, "learning_rate": 4.6059237377327905e-05, "loss": 1.146, "step": 6380 }, { "epoch": 0.79, "learning_rate": 4.603982349586924e-05, "loss": 1.1865, "step": 6390 }, { "epoch": 0.79, "learning_rate": 4.6020366023748945e-05, "loss": 1.1718, "step": 6400 }, { "epoch": 0.79, "learning_rate": 4.6000865001279345e-05, "loss": 1.1528, "step": 6410 }, { "epoch": 0.79, "learning_rate": 4.598132046886299e-05, "loss": 1.1702, "step": 6420 }, { "epoch": 0.8, "learning_rate": 4.5961732466992566e-05, "loss": 1.1807, "step": 6430 }, { "epoch": 0.8, "learning_rate": 4.5942101036250826e-05, "loss": 1.1482, "step": 6440 }, { "epoch": 0.8, "learning_rate": 4.592242621731051e-05, "loss": 1.1516, "step": 6450 }, { "epoch": 0.8, "learning_rate": 4.590270805093423e-05, "loss": 1.1622, "step": 6460 }, { "epoch": 0.8, "learning_rate": 4.588294657797442e-05, "loss": 1.1704, "step": 6470 }, { "epoch": 0.8, "learning_rate": 4.586314183937324e-05, "loss": 1.1556, "step": 6480 }, { "epoch": 0.8, "learning_rate": 4.584329387616246e-05, "loss": 1.1277, "step": 6490 }, { "epoch": 0.8, "learning_rate": 4.582340272946345e-05, "loss": 1.104, "step": 6500 }, { "epoch": 0.8, "eval_loss": 1.0666757822036743, "eval_runtime": 5.7634, "eval_samples_per_second": 89.878, "eval_steps_per_second": 11.278, "step": 6500 }, { "epoch": 0.81, "learning_rate": 4.5803468440487004e-05, "loss": 1.153, "step": 6510 }, { "epoch": 0.81, "learning_rate": 4.578349105053334e-05, "loss": 1.1413, "step": 6520 }, { "epoch": 0.81, "learning_rate": 4.576347060099192e-05, "loss": 1.147, "step": 6530 }, { "epoch": 0.81, "learning_rate": 4.5743407133341484e-05, "loss": 1.1875, "step": 6540 }, { "epoch": 0.81, "learning_rate": 4.5723300689149844e-05, "loss": 1.1913, "step": 6550 }, { "epoch": 0.81, "learning_rate": 4.570315131007388e-05, "loss": 1.1226, "step": 6560 }, { "epoch": 0.81, "learning_rate": 4.568295903785941e-05, "loss": 1.1865, "step": 6570 }, { "epoch": 0.81, "learning_rate": 4.566272391434112e-05, "loss": 1.1709, "step": 6580 }, { "epoch": 0.82, "learning_rate": 4.564244598144249e-05, "loss": 1.1522, "step": 6590 }, { "epoch": 0.82, "learning_rate": 4.562212528117568e-05, "loss": 1.1143, "step": 6600 }, { "epoch": 0.82, "learning_rate": 4.560176185564146e-05, "loss": 1.148, "step": 6610 }, { "epoch": 0.82, "learning_rate": 4.558135574702911e-05, "loss": 1.1695, "step": 6620 }, { "epoch": 0.82, "learning_rate": 4.5560906997616367e-05, "loss": 1.1309, "step": 6630 }, { "epoch": 0.82, "learning_rate": 4.5540415649769284e-05, "loss": 1.1146, "step": 6640 }, { "epoch": 0.82, "learning_rate": 4.5519881745942186e-05, "loss": 1.1798, "step": 6650 }, { "epoch": 0.82, "learning_rate": 4.549930532867757e-05, "loss": 1.1346, "step": 6660 }, { "epoch": 0.83, "learning_rate": 4.5478686440605984e-05, "loss": 1.1633, "step": 6670 }, { "epoch": 0.83, "learning_rate": 4.5458025124446005e-05, "loss": 1.1527, "step": 6680 }, { "epoch": 0.83, "learning_rate": 4.543732142300408e-05, "loss": 1.1634, "step": 6690 }, { "epoch": 0.83, "learning_rate": 4.5416575379174515e-05, "loss": 1.1537, "step": 6700 }, { "epoch": 0.83, "learning_rate": 4.539578703593929e-05, "loss": 1.1409, "step": 6710 }, { "epoch": 0.83, "learning_rate": 4.5374956436368055e-05, "loss": 1.1525, "step": 6720 }, { "epoch": 0.83, "learning_rate": 4.5354083623618e-05, "loss": 1.1341, "step": 6730 }, { "epoch": 0.83, "learning_rate": 4.533316864093376e-05, "loss": 1.1455, "step": 6740 }, { "epoch": 0.83, "learning_rate": 4.5312211531647364e-05, "loss": 1.1527, "step": 6750 }, { "epoch": 0.84, "learning_rate": 4.529121233917809e-05, "loss": 1.1348, "step": 6760 }, { "epoch": 0.84, "learning_rate": 4.527017110703243e-05, "loss": 1.1579, "step": 6770 }, { "epoch": 0.84, "learning_rate": 4.5249087878803965e-05, "loss": 1.1542, "step": 6780 }, { "epoch": 0.84, "learning_rate": 4.5227962698173274e-05, "loss": 1.1334, "step": 6790 }, { "epoch": 0.84, "learning_rate": 4.520679560890787e-05, "loss": 1.1488, "step": 6800 }, { "epoch": 0.84, "learning_rate": 4.518558665486209e-05, "loss": 1.1579, "step": 6810 }, { "epoch": 0.84, "learning_rate": 4.516433587997699e-05, "loss": 1.1086, "step": 6820 }, { "epoch": 0.84, "learning_rate": 4.51430433282803e-05, "loss": 1.1344, "step": 6830 }, { "epoch": 0.85, "learning_rate": 4.512170904388627e-05, "loss": 1.1374, "step": 6840 }, { "epoch": 0.85, "learning_rate": 4.510033307099565e-05, "loss": 1.1784, "step": 6850 }, { "epoch": 0.85, "learning_rate": 4.5078915453895535e-05, "loss": 1.1594, "step": 6860 }, { "epoch": 0.85, "learning_rate": 4.5057456236959296e-05, "loss": 1.1497, "step": 6870 }, { "epoch": 0.85, "learning_rate": 4.503595546464651e-05, "loss": 1.1802, "step": 6880 }, { "epoch": 0.85, "learning_rate": 4.501441318150283e-05, "loss": 1.1592, "step": 6890 }, { "epoch": 0.85, "learning_rate": 4.499282943215994e-05, "loss": 1.1789, "step": 6900 }, { "epoch": 0.85, "learning_rate": 4.497120426133541e-05, "loss": 1.1523, "step": 6910 }, { "epoch": 0.86, "learning_rate": 4.494953771383262e-05, "loss": 1.2015, "step": 6920 }, { "epoch": 0.86, "learning_rate": 4.4927829834540715e-05, "loss": 1.1259, "step": 6930 }, { "epoch": 0.86, "learning_rate": 4.4906080668434416e-05, "loss": 1.1982, "step": 6940 }, { "epoch": 0.86, "learning_rate": 4.488429026057404e-05, "loss": 1.1435, "step": 6950 }, { "epoch": 0.86, "learning_rate": 4.48624586561053e-05, "loss": 1.1524, "step": 6960 }, { "epoch": 0.86, "learning_rate": 4.4840585900259296e-05, "loss": 1.1662, "step": 6970 }, { "epoch": 0.86, "learning_rate": 4.481867203835237e-05, "loss": 1.1123, "step": 6980 }, { "epoch": 0.86, "learning_rate": 4.479671711578603e-05, "loss": 1.1497, "step": 6990 }, { "epoch": 0.87, "learning_rate": 4.4774721178046855e-05, "loss": 1.1224, "step": 7000 }, { "epoch": 0.87, "eval_loss": 1.0597686767578125, "eval_runtime": 5.764, "eval_samples_per_second": 89.867, "eval_steps_per_second": 11.277, "step": 7000 }, { "epoch": 0.87, "learning_rate": 4.47526842707064e-05, "loss": 1.1454, "step": 7010 }, { "epoch": 0.87, "learning_rate": 4.47306064394211e-05, "loss": 1.1773, "step": 7020 }, { "epoch": 0.87, "learning_rate": 4.470848772993219e-05, "loss": 1.1513, "step": 7030 }, { "epoch": 0.87, "learning_rate": 4.468632818806557e-05, "loss": 1.1719, "step": 7040 }, { "epoch": 0.87, "learning_rate": 4.4664127859731766e-05, "loss": 1.1373, "step": 7050 }, { "epoch": 0.87, "learning_rate": 4.464188679092578e-05, "loss": 1.1578, "step": 7060 }, { "epoch": 0.87, "learning_rate": 4.461960502772704e-05, "loss": 1.1745, "step": 7070 }, { "epoch": 0.88, "learning_rate": 4.4597282616299294e-05, "loss": 1.1643, "step": 7080 }, { "epoch": 0.88, "learning_rate": 4.457491960289046e-05, "loss": 1.1308, "step": 7090 }, { "epoch": 0.88, "learning_rate": 4.4552516033832645e-05, "loss": 1.126, "step": 7100 }, { "epoch": 0.88, "learning_rate": 4.453007195554191e-05, "loss": 1.1931, "step": 7110 }, { "epoch": 0.88, "learning_rate": 4.450758741451828e-05, "loss": 1.1437, "step": 7120 }, { "epoch": 0.88, "learning_rate": 4.448506245734563e-05, "loss": 1.1313, "step": 7130 }, { "epoch": 0.88, "learning_rate": 4.4462497130691514e-05, "loss": 1.1492, "step": 7140 }, { "epoch": 0.88, "learning_rate": 4.4439891481307184e-05, "loss": 1.1775, "step": 7150 }, { "epoch": 0.89, "learning_rate": 4.441724555602739e-05, "loss": 1.2022, "step": 7160 }, { "epoch": 0.89, "learning_rate": 4.4394559401770344e-05, "loss": 1.1165, "step": 7170 }, { "epoch": 0.89, "learning_rate": 4.4371833065537614e-05, "loss": 1.1326, "step": 7180 }, { "epoch": 0.89, "learning_rate": 4.4349066594413996e-05, "loss": 1.1371, "step": 7190 }, { "epoch": 0.89, "learning_rate": 4.4326260035567456e-05, "loss": 1.1254, "step": 7200 }, { "epoch": 0.89, "learning_rate": 4.430341343624901e-05, "loss": 1.1212, "step": 7210 }, { "epoch": 0.89, "learning_rate": 4.4280526843792614e-05, "loss": 1.1501, "step": 7220 }, { "epoch": 0.89, "learning_rate": 4.425760030561511e-05, "loss": 1.0971, "step": 7230 }, { "epoch": 0.9, "learning_rate": 4.423463386921608e-05, "loss": 1.1613, "step": 7240 }, { "epoch": 0.9, "learning_rate": 4.421162758217778e-05, "loss": 1.142, "step": 7250 }, { "epoch": 0.9, "learning_rate": 4.4188581492165025e-05, "loss": 1.1541, "step": 7260 }, { "epoch": 0.9, "learning_rate": 4.4165495646925095e-05, "loss": 1.1291, "step": 7270 }, { "epoch": 0.9, "learning_rate": 4.414237009428762e-05, "loss": 1.1866, "step": 7280 }, { "epoch": 0.9, "learning_rate": 4.411920488216452e-05, "loss": 1.1467, "step": 7290 }, { "epoch": 0.9, "learning_rate": 4.409600005854988e-05, "loss": 1.1166, "step": 7300 }, { "epoch": 0.9, "learning_rate": 4.407275567151983e-05, "loss": 1.1589, "step": 7310 }, { "epoch": 0.91, "learning_rate": 4.4049471769232496e-05, "loss": 1.2047, "step": 7320 }, { "epoch": 0.91, "learning_rate": 4.402614839992786e-05, "loss": 1.2128, "step": 7330 }, { "epoch": 0.91, "learning_rate": 4.400278561192767e-05, "loss": 1.1628, "step": 7340 }, { "epoch": 0.91, "learning_rate": 4.3979383453635333e-05, "loss": 1.1453, "step": 7350 }, { "epoch": 0.91, "learning_rate": 4.395594197353585e-05, "loss": 1.1652, "step": 7360 }, { "epoch": 0.91, "learning_rate": 4.393246122019567e-05, "loss": 1.1544, "step": 7370 }, { "epoch": 0.91, "learning_rate": 4.390894124226261e-05, "loss": 1.0748, "step": 7380 }, { "epoch": 0.91, "learning_rate": 4.388538208846577e-05, "loss": 1.1389, "step": 7390 }, { "epoch": 0.92, "learning_rate": 4.3861783807615396e-05, "loss": 1.1769, "step": 7400 }, { "epoch": 0.92, "learning_rate": 4.38381464486028e-05, "loss": 1.1197, "step": 7410 }, { "epoch": 0.92, "learning_rate": 4.381447006040027e-05, "loss": 1.1511, "step": 7420 }, { "epoch": 0.92, "learning_rate": 4.379075469206094e-05, "loss": 1.1547, "step": 7430 }, { "epoch": 0.92, "learning_rate": 4.376700039271871e-05, "loss": 1.1686, "step": 7440 }, { "epoch": 0.92, "learning_rate": 4.374320721158814e-05, "loss": 1.1379, "step": 7450 }, { "epoch": 0.92, "learning_rate": 4.371937519796436e-05, "loss": 1.1357, "step": 7460 }, { "epoch": 0.92, "learning_rate": 4.369550440122291e-05, "loss": 1.1332, "step": 7470 }, { "epoch": 0.93, "learning_rate": 4.3671594870819734e-05, "loss": 1.0943, "step": 7480 }, { "epoch": 0.93, "learning_rate": 4.3647646656290985e-05, "loss": 1.1537, "step": 7490 }, { "epoch": 0.93, "learning_rate": 4.362365980725299e-05, "loss": 1.164, "step": 7500 }, { "epoch": 0.93, "eval_loss": 1.0545625686645508, "eval_runtime": 5.7642, "eval_samples_per_second": 89.865, "eval_steps_per_second": 11.277, "step": 7500 }, { "epoch": 0.93, "learning_rate": 4.3599634373402096e-05, "loss": 1.17, "step": 7510 }, { "epoch": 0.93, "learning_rate": 4.357557040451461e-05, "loss": 1.1273, "step": 7520 }, { "epoch": 0.93, "learning_rate": 4.3551467950446666e-05, "loss": 1.1338, "step": 7530 }, { "epoch": 0.93, "learning_rate": 4.352732706113415e-05, "loss": 1.1678, "step": 7540 }, { "epoch": 0.93, "learning_rate": 4.3503147786592534e-05, "loss": 1.1786, "step": 7550 }, { "epoch": 0.94, "learning_rate": 4.3478930176916875e-05, "loss": 1.1294, "step": 7560 }, { "epoch": 0.94, "learning_rate": 4.3454674282281615e-05, "loss": 1.1333, "step": 7570 }, { "epoch": 0.94, "learning_rate": 4.343038015294053e-05, "loss": 1.1515, "step": 7580 }, { "epoch": 0.94, "learning_rate": 4.3406047839226596e-05, "loss": 1.1936, "step": 7590 }, { "epoch": 0.94, "learning_rate": 4.3381677391551926e-05, "loss": 1.1203, "step": 7600 }, { "epoch": 0.94, "learning_rate": 4.3357268860407616e-05, "loss": 1.1492, "step": 7610 }, { "epoch": 0.94, "learning_rate": 4.3332822296363675e-05, "loss": 1.1298, "step": 7620 }, { "epoch": 0.94, "learning_rate": 4.3308337750068906e-05, "loss": 1.1431, "step": 7630 }, { "epoch": 0.95, "learning_rate": 4.328381527225082e-05, "loss": 1.1197, "step": 7640 }, { "epoch": 0.95, "learning_rate": 4.3259254913715476e-05, "loss": 1.1265, "step": 7650 }, { "epoch": 0.95, "learning_rate": 4.323465672534745e-05, "loss": 1.1421, "step": 7660 }, { "epoch": 0.95, "learning_rate": 4.321002075810969e-05, "loss": 1.1673, "step": 7670 }, { "epoch": 0.95, "learning_rate": 4.3185347063043405e-05, "loss": 1.1151, "step": 7680 }, { "epoch": 0.95, "learning_rate": 4.316063569126797e-05, "loss": 1.1386, "step": 7690 }, { "epoch": 0.95, "learning_rate": 4.3135886693980826e-05, "loss": 1.1347, "step": 7700 }, { "epoch": 0.95, "learning_rate": 4.3111100122457364e-05, "loss": 1.1438, "step": 7710 }, { "epoch": 0.95, "learning_rate": 4.308627602805082e-05, "loss": 1.1472, "step": 7720 }, { "epoch": 0.96, "learning_rate": 4.306141446219217e-05, "loss": 1.1036, "step": 7730 }, { "epoch": 0.96, "learning_rate": 4.3036515476390033e-05, "loss": 1.1359, "step": 7740 }, { "epoch": 0.96, "learning_rate": 4.3011579122230536e-05, "loss": 1.1645, "step": 7750 }, { "epoch": 0.96, "learning_rate": 4.298660545137725e-05, "loss": 1.1119, "step": 7760 }, { "epoch": 0.96, "learning_rate": 4.296159451557105e-05, "loss": 1.1248, "step": 7770 }, { "epoch": 0.96, "learning_rate": 4.293654636663e-05, "loss": 1.123, "step": 7780 }, { "epoch": 0.96, "learning_rate": 4.291146105644929e-05, "loss": 1.1446, "step": 7790 }, { "epoch": 0.96, "learning_rate": 4.2886338637001086e-05, "loss": 1.1314, "step": 7800 }, { "epoch": 0.97, "learning_rate": 4.2861179160334445e-05, "loss": 1.1581, "step": 7810 }, { "epoch": 0.97, "learning_rate": 4.283598267857519e-05, "loss": 1.1603, "step": 7820 }, { "epoch": 0.97, "learning_rate": 4.281074924392582e-05, "loss": 1.1182, "step": 7830 }, { "epoch": 0.97, "learning_rate": 4.278547890866538e-05, "loss": 1.1635, "step": 7840 }, { "epoch": 0.97, "learning_rate": 4.2760171725149404e-05, "loss": 1.156, "step": 7850 }, { "epoch": 0.97, "learning_rate": 4.2734827745809715e-05, "loss": 1.1459, "step": 7860 }, { "epoch": 0.97, "learning_rate": 4.2709447023154415e-05, "loss": 1.1402, "step": 7870 }, { "epoch": 0.97, "learning_rate": 4.26840296097677e-05, "loss": 1.1653, "step": 7880 }, { "epoch": 0.98, "learning_rate": 4.265857555830981e-05, "loss": 1.1699, "step": 7890 }, { "epoch": 0.98, "learning_rate": 4.2633084921516864e-05, "loss": 1.1403, "step": 7900 }, { "epoch": 0.98, "learning_rate": 4.260755775220081e-05, "loss": 1.1759, "step": 7910 }, { "epoch": 0.98, "learning_rate": 4.2581994103249254e-05, "loss": 1.1511, "step": 7920 }, { "epoch": 0.98, "learning_rate": 4.2556394027625405e-05, "loss": 1.146, "step": 7930 }, { "epoch": 0.98, "learning_rate": 4.253075757836793e-05, "loss": 1.1145, "step": 7940 }, { "epoch": 0.98, "learning_rate": 4.2505084808590855e-05, "loss": 1.1548, "step": 7950 }, { "epoch": 0.98, "learning_rate": 4.247937577148345e-05, "loss": 1.1272, "step": 7960 }, { "epoch": 0.99, "learning_rate": 4.245363052031016e-05, "loss": 1.1469, "step": 7970 }, { "epoch": 0.99, "learning_rate": 4.24278491084104e-05, "loss": 1.1679, "step": 7980 }, { "epoch": 0.99, "learning_rate": 4.2402031589198554e-05, "loss": 1.1287, "step": 7990 }, { "epoch": 0.99, "learning_rate": 4.237617801616378e-05, "loss": 1.1448, "step": 8000 }, { "epoch": 0.99, "eval_loss": 1.0445607900619507, "eval_runtime": 5.7627, "eval_samples_per_second": 89.889, "eval_steps_per_second": 11.279, "step": 8000 }, { "epoch": 0.99, "learning_rate": 4.235028844286997e-05, "loss": 1.1526, "step": 8010 }, { "epoch": 0.99, "learning_rate": 4.2324362922955546e-05, "loss": 1.145, "step": 8020 }, { "epoch": 0.99, "learning_rate": 4.229840151013347e-05, "loss": 1.1585, "step": 8030 }, { "epoch": 0.99, "learning_rate": 4.2272404258191014e-05, "loss": 1.1796, "step": 8040 }, { "epoch": 1.0, "learning_rate": 4.224637122098972e-05, "loss": 1.148, "step": 8050 }, { "epoch": 1.0, "learning_rate": 4.22203024524653e-05, "loss": 1.1426, "step": 8060 }, { "epoch": 1.0, "learning_rate": 4.219419800662745e-05, "loss": 1.1373, "step": 8070 }, { "epoch": 1.0, "learning_rate": 4.216805793755981e-05, "loss": 1.1189, "step": 8080 }, { "epoch": 1.0, "learning_rate": 4.2141882299419806e-05, "loss": 1.097, "step": 8090 }, { "epoch": 1.0, "learning_rate": 4.2115671146438574e-05, "loss": 1.1113, "step": 8100 }, { "epoch": 1.0, "learning_rate": 4.208942453292083e-05, "loss": 1.1328, "step": 8110 }, { "epoch": 1.0, "learning_rate": 4.2063142513244736e-05, "loss": 1.0769, "step": 8120 }, { "epoch": 1.01, "learning_rate": 4.2036825141861836e-05, "loss": 1.1122, "step": 8130 }, { "epoch": 1.01, "learning_rate": 4.2010472473296895e-05, "loss": 1.1512, "step": 8140 }, { "epoch": 1.01, "learning_rate": 4.198408456214783e-05, "loss": 1.1313, "step": 8150 }, { "epoch": 1.01, "learning_rate": 4.195766146308555e-05, "loss": 1.1584, "step": 8160 }, { "epoch": 1.01, "learning_rate": 4.1931203230853874e-05, "loss": 1.1451, "step": 8170 }, { "epoch": 1.01, "learning_rate": 4.190470992026944e-05, "loss": 1.0846, "step": 8180 }, { "epoch": 1.01, "learning_rate": 4.18781815862215e-05, "loss": 1.1398, "step": 8190 }, { "epoch": 1.01, "learning_rate": 4.1851618283671924e-05, "loss": 1.0838, "step": 8200 }, { "epoch": 1.02, "learning_rate": 4.182502006765502e-05, "loss": 1.114, "step": 8210 }, { "epoch": 1.02, "learning_rate": 4.179838699327739e-05, "loss": 1.1078, "step": 8220 }, { "epoch": 1.02, "learning_rate": 4.1771719115717925e-05, "loss": 1.1787, "step": 8230 }, { "epoch": 1.02, "learning_rate": 4.174501649022756e-05, "loss": 1.0983, "step": 8240 }, { "epoch": 1.02, "learning_rate": 4.171827917212926e-05, "loss": 1.1255, "step": 8250 }, { "epoch": 1.02, "learning_rate": 4.1691507216817834e-05, "loss": 1.1437, "step": 8260 }, { "epoch": 1.02, "learning_rate": 4.1664700679759886e-05, "loss": 1.1051, "step": 8270 }, { "epoch": 1.02, "learning_rate": 4.163785961649364e-05, "loss": 1.1018, "step": 8280 }, { "epoch": 1.03, "learning_rate": 4.161098408262888e-05, "loss": 1.129, "step": 8290 }, { "epoch": 1.03, "learning_rate": 4.158407413384678e-05, "loss": 1.0712, "step": 8300 }, { "epoch": 1.03, "learning_rate": 4.155712982589983e-05, "loss": 1.1564, "step": 8310 }, { "epoch": 1.03, "learning_rate": 4.15301512146117e-05, "loss": 1.1427, "step": 8320 }, { "epoch": 1.03, "learning_rate": 4.1503138355877135e-05, "loss": 1.0921, "step": 8330 }, { "epoch": 1.03, "learning_rate": 4.1476091305661823e-05, "loss": 1.0976, "step": 8340 }, { "epoch": 1.03, "learning_rate": 4.1449010120002305e-05, "loss": 1.0852, "step": 8350 }, { "epoch": 1.03, "learning_rate": 4.142189485500585e-05, "loss": 1.156, "step": 8360 }, { "epoch": 1.04, "learning_rate": 4.139474556685031e-05, "loss": 1.0868, "step": 8370 }, { "epoch": 1.04, "learning_rate": 4.136756231178405e-05, "loss": 1.095, "step": 8380 }, { "epoch": 1.04, "learning_rate": 4.134034514612577e-05, "loss": 1.1324, "step": 8390 }, { "epoch": 1.04, "learning_rate": 4.1313094126264484e-05, "loss": 1.1278, "step": 8400 }, { "epoch": 1.04, "learning_rate": 4.1285809308659296e-05, "loss": 1.1301, "step": 8410 }, { "epoch": 1.04, "learning_rate": 4.125849074983936e-05, "loss": 1.1119, "step": 8420 }, { "epoch": 1.04, "learning_rate": 4.1231138506403726e-05, "loss": 1.1334, "step": 8430 }, { "epoch": 1.04, "learning_rate": 4.120375263502124e-05, "loss": 1.127, "step": 8440 }, { "epoch": 1.05, "learning_rate": 4.117633319243039e-05, "loss": 1.1554, "step": 8450 }, { "epoch": 1.05, "learning_rate": 4.114888023543927e-05, "loss": 1.1077, "step": 8460 }, { "epoch": 1.05, "learning_rate": 4.112139382092537e-05, "loss": 1.1169, "step": 8470 }, { "epoch": 1.05, "learning_rate": 4.10938740058355e-05, "loss": 1.1208, "step": 8480 }, { "epoch": 1.05, "learning_rate": 4.106632084718568e-05, "loss": 1.1371, "step": 8490 }, { "epoch": 1.05, "learning_rate": 4.103873440206101e-05, "loss": 1.1582, "step": 8500 }, { "epoch": 1.05, "eval_loss": 1.0412343740463257, "eval_runtime": 5.7639, "eval_samples_per_second": 89.87, "eval_steps_per_second": 11.277, "step": 8500 }, { "epoch": 1.05, "learning_rate": 4.1011114727615555e-05, "loss": 1.1159, "step": 8510 }, { "epoch": 1.05, "learning_rate": 4.0983461881072225e-05, "loss": 1.1449, "step": 8520 }, { "epoch": 1.06, "learning_rate": 4.095577591972265e-05, "loss": 1.1453, "step": 8530 }, { "epoch": 1.06, "learning_rate": 4.092805690092708e-05, "loss": 1.1376, "step": 8540 }, { "epoch": 1.06, "learning_rate": 4.090030488211423e-05, "loss": 1.1188, "step": 8550 }, { "epoch": 1.06, "learning_rate": 4.087251992078122e-05, "loss": 1.1354, "step": 8560 }, { "epoch": 1.06, "learning_rate": 4.084470207449339e-05, "loss": 1.1114, "step": 8570 }, { "epoch": 1.06, "learning_rate": 4.0816851400884226e-05, "loss": 1.1488, "step": 8580 }, { "epoch": 1.06, "learning_rate": 4.078896795765522e-05, "loss": 1.1122, "step": 8590 }, { "epoch": 1.06, "learning_rate": 4.076105180257577e-05, "loss": 1.1274, "step": 8600 }, { "epoch": 1.07, "learning_rate": 4.073310299348304e-05, "loss": 1.1196, "step": 8610 }, { "epoch": 1.07, "learning_rate": 4.070512158828184e-05, "loss": 1.0901, "step": 8620 }, { "epoch": 1.07, "learning_rate": 4.06771076449445e-05, "loss": 1.1413, "step": 8630 }, { "epoch": 1.07, "learning_rate": 4.064906122151081e-05, "loss": 1.1208, "step": 8640 }, { "epoch": 1.07, "learning_rate": 4.06209823760878e-05, "loss": 1.1584, "step": 8650 }, { "epoch": 1.07, "learning_rate": 4.059287116684972e-05, "loss": 1.111, "step": 8660 }, { "epoch": 1.07, "learning_rate": 4.056472765203782e-05, "loss": 1.1123, "step": 8670 }, { "epoch": 1.07, "learning_rate": 4.0536551889960326e-05, "loss": 1.1291, "step": 8680 }, { "epoch": 1.07, "learning_rate": 4.050834393899227e-05, "loss": 1.0982, "step": 8690 }, { "epoch": 1.08, "learning_rate": 4.048010385757534e-05, "loss": 1.1267, "step": 8700 }, { "epoch": 1.08, "learning_rate": 4.0451831704217815e-05, "loss": 1.1009, "step": 8710 }, { "epoch": 1.08, "learning_rate": 4.0423527537494424e-05, "loss": 1.112, "step": 8720 }, { "epoch": 1.08, "learning_rate": 4.039519141604622e-05, "loss": 1.1231, "step": 8730 }, { "epoch": 1.08, "learning_rate": 4.036682339858046e-05, "loss": 1.1602, "step": 8740 }, { "epoch": 1.08, "learning_rate": 4.033842354387047e-05, "loss": 1.1013, "step": 8750 }, { "epoch": 1.08, "learning_rate": 4.030999191075556e-05, "loss": 1.1679, "step": 8760 }, { "epoch": 1.08, "learning_rate": 4.028152855814087e-05, "loss": 1.1438, "step": 8770 }, { "epoch": 1.09, "learning_rate": 4.025303354499724e-05, "loss": 1.1588, "step": 8780 }, { "epoch": 1.09, "learning_rate": 4.022450693036114e-05, "loss": 1.1448, "step": 8790 }, { "epoch": 1.09, "learning_rate": 4.019594877333447e-05, "loss": 1.126, "step": 8800 }, { "epoch": 1.09, "learning_rate": 4.016735913308452e-05, "loss": 1.133, "step": 8810 }, { "epoch": 1.09, "learning_rate": 4.013873806884378e-05, "loss": 1.1431, "step": 8820 }, { "epoch": 1.09, "learning_rate": 4.011008563990986e-05, "loss": 1.1015, "step": 8830 }, { "epoch": 1.09, "learning_rate": 4.008140190564533e-05, "loss": 1.1413, "step": 8840 }, { "epoch": 1.09, "learning_rate": 4.0052686925477656e-05, "loss": 1.0785, "step": 8850 }, { "epoch": 1.1, "learning_rate": 4.0023940758899004e-05, "loss": 1.1091, "step": 8860 }, { "epoch": 1.1, "learning_rate": 3.999516346546618e-05, "loss": 1.1731, "step": 8870 }, { "epoch": 1.1, "learning_rate": 3.996635510480046e-05, "loss": 1.0999, "step": 8880 }, { "epoch": 1.1, "learning_rate": 3.993751573658749e-05, "loss": 1.1424, "step": 8890 }, { "epoch": 1.1, "learning_rate": 3.990864542057716e-05, "loss": 1.117, "step": 8900 }, { "epoch": 1.1, "learning_rate": 3.987974421658348e-05, "loss": 1.115, "step": 8910 }, { "epoch": 1.1, "learning_rate": 3.9850812184484454e-05, "loss": 1.1463, "step": 8920 }, { "epoch": 1.1, "learning_rate": 3.982184938422196e-05, "loss": 1.0894, "step": 8930 }, { "epoch": 1.11, "learning_rate": 3.97928558758016e-05, "loss": 1.1138, "step": 8940 }, { "epoch": 1.11, "learning_rate": 3.976383171929263e-05, "loss": 1.0911, "step": 8950 }, { "epoch": 1.11, "learning_rate": 3.973477697482778e-05, "loss": 1.1567, "step": 8960 }, { "epoch": 1.11, "learning_rate": 3.970569170260317e-05, "loss": 1.1079, "step": 8970 }, { "epoch": 1.11, "learning_rate": 3.967657596287814e-05, "loss": 1.098, "step": 8980 }, { "epoch": 1.11, "learning_rate": 3.964742981597518e-05, "loss": 1.1392, "step": 8990 }, { "epoch": 1.11, "learning_rate": 3.961825332227978e-05, "loss": 1.1326, "step": 9000 }, { "epoch": 1.11, "eval_loss": 1.0326589345932007, "eval_runtime": 5.7632, "eval_samples_per_second": 89.881, "eval_steps_per_second": 11.279, "step": 9000 }, { "epoch": 1.11, "learning_rate": 3.958904654224028e-05, "loss": 1.1302, "step": 9010 }, { "epoch": 1.12, "learning_rate": 3.955980953636779e-05, "loss": 1.1232, "step": 9020 }, { "epoch": 1.12, "learning_rate": 3.953054236523601e-05, "loss": 1.1298, "step": 9030 }, { "epoch": 1.12, "learning_rate": 3.9501245089481194e-05, "loss": 1.1508, "step": 9040 }, { "epoch": 1.12, "learning_rate": 3.94719177698019e-05, "loss": 1.1424, "step": 9050 }, { "epoch": 1.12, "learning_rate": 3.944256046695899e-05, "loss": 1.0716, "step": 9060 }, { "epoch": 1.12, "learning_rate": 3.94131732417754e-05, "loss": 1.1394, "step": 9070 }, { "epoch": 1.12, "learning_rate": 3.93837561551361e-05, "loss": 1.1057, "step": 9080 }, { "epoch": 1.12, "learning_rate": 3.935430926798788e-05, "loss": 1.1101, "step": 9090 }, { "epoch": 1.13, "learning_rate": 3.932483264133932e-05, "loss": 1.1229, "step": 9100 }, { "epoch": 1.13, "learning_rate": 3.929532633626058e-05, "loss": 1.1618, "step": 9110 }, { "epoch": 1.13, "learning_rate": 3.926579041388333e-05, "loss": 1.1355, "step": 9120 }, { "epoch": 1.13, "learning_rate": 3.923622493540059e-05, "loss": 1.0898, "step": 9130 }, { "epoch": 1.13, "learning_rate": 3.920662996206661e-05, "loss": 1.1455, "step": 9140 }, { "epoch": 1.13, "learning_rate": 3.917700555519677e-05, "loss": 1.156, "step": 9150 }, { "epoch": 1.13, "learning_rate": 3.914735177616741e-05, "loss": 1.119, "step": 9160 }, { "epoch": 1.13, "learning_rate": 3.911766868641573e-05, "loss": 1.1109, "step": 9170 }, { "epoch": 1.14, "learning_rate": 3.908795634743965e-05, "loss": 1.1524, "step": 9180 }, { "epoch": 1.14, "learning_rate": 3.90582148207977e-05, "loss": 1.1376, "step": 9190 }, { "epoch": 1.14, "learning_rate": 3.902844416810888e-05, "loss": 1.1177, "step": 9200 }, { "epoch": 1.14, "learning_rate": 3.8998644451052536e-05, "loss": 1.1328, "step": 9210 }, { "epoch": 1.14, "learning_rate": 3.896881573136821e-05, "loss": 1.1206, "step": 9220 }, { "epoch": 1.14, "learning_rate": 3.8938958070855566e-05, "loss": 1.1143, "step": 9230 }, { "epoch": 1.14, "learning_rate": 3.8909071531374184e-05, "loss": 1.0996, "step": 9240 }, { "epoch": 1.14, "learning_rate": 3.8879156174843536e-05, "loss": 1.1494, "step": 9250 }, { "epoch": 1.15, "learning_rate": 3.8849212063242726e-05, "loss": 1.1447, "step": 9260 }, { "epoch": 1.15, "learning_rate": 3.88192392586105e-05, "loss": 1.1533, "step": 9270 }, { "epoch": 1.15, "learning_rate": 3.878923782304499e-05, "loss": 1.0953, "step": 9280 }, { "epoch": 1.15, "learning_rate": 3.8759207818703706e-05, "loss": 1.1402, "step": 9290 }, { "epoch": 1.15, "learning_rate": 3.872914930780331e-05, "loss": 1.1039, "step": 9300 }, { "epoch": 1.15, "learning_rate": 3.8699062352619524e-05, "loss": 1.1293, "step": 9310 }, { "epoch": 1.15, "learning_rate": 3.8668947015487015e-05, "loss": 1.1523, "step": 9320 }, { "epoch": 1.15, "learning_rate": 3.863880335879925e-05, "loss": 1.1277, "step": 9330 }, { "epoch": 1.16, "learning_rate": 3.860863144500836e-05, "loss": 1.1138, "step": 9340 }, { "epoch": 1.16, "learning_rate": 3.857843133662503e-05, "loss": 1.1411, "step": 9350 }, { "epoch": 1.16, "learning_rate": 3.854820309621836e-05, "loss": 1.0831, "step": 9360 }, { "epoch": 1.16, "learning_rate": 3.851794678641572e-05, "loss": 1.0936, "step": 9370 }, { "epoch": 1.16, "learning_rate": 3.848766246990264e-05, "loss": 1.1398, "step": 9380 }, { "epoch": 1.16, "learning_rate": 3.845735020942268e-05, "loss": 1.0537, "step": 9390 }, { "epoch": 1.16, "learning_rate": 3.842701006777731e-05, "loss": 1.1364, "step": 9400 }, { "epoch": 1.16, "learning_rate": 3.839664210782573e-05, "loss": 1.1252, "step": 9410 }, { "epoch": 1.17, "learning_rate": 3.8366246392484796e-05, "loss": 1.0967, "step": 9420 }, { "epoch": 1.17, "learning_rate": 3.8335822984728866e-05, "loss": 1.077, "step": 9430 }, { "epoch": 1.17, "learning_rate": 3.830537194758967e-05, "loss": 1.1082, "step": 9440 }, { "epoch": 1.17, "learning_rate": 3.827489334415618e-05, "loss": 1.1591, "step": 9450 }, { "epoch": 1.17, "learning_rate": 3.824438723757447e-05, "loss": 1.1019, "step": 9460 }, { "epoch": 1.17, "learning_rate": 3.8213853691047644e-05, "loss": 1.1382, "step": 9470 }, { "epoch": 1.17, "learning_rate": 3.818329276783558e-05, "loss": 1.1209, "step": 9480 }, { "epoch": 1.17, "learning_rate": 3.815270453125493e-05, "loss": 1.1131, "step": 9490 }, { "epoch": 1.18, "learning_rate": 3.8122089044678916e-05, "loss": 1.1313, "step": 9500 }, { "epoch": 1.18, "eval_loss": 1.0298551321029663, "eval_runtime": 5.7612, "eval_samples_per_second": 89.911, "eval_steps_per_second": 11.282, "step": 9500 }, { "epoch": 1.18, "learning_rate": 3.809144637153722e-05, "loss": 1.1617, "step": 9510 }, { "epoch": 1.18, "learning_rate": 3.8060776575315865e-05, "loss": 1.101, "step": 9520 }, { "epoch": 1.18, "learning_rate": 3.8030079719557026e-05, "loss": 1.1377, "step": 9530 }, { "epoch": 1.18, "learning_rate": 3.7999355867858985e-05, "loss": 1.1324, "step": 9540 }, { "epoch": 1.18, "learning_rate": 3.796860508387593e-05, "loss": 1.1266, "step": 9550 }, { "epoch": 1.18, "learning_rate": 3.7937827431317866e-05, "loss": 1.0918, "step": 9560 }, { "epoch": 1.18, "learning_rate": 3.790702297395044e-05, "loss": 1.0917, "step": 9570 }, { "epoch": 1.19, "learning_rate": 3.787619177559485e-05, "loss": 1.1262, "step": 9580 }, { "epoch": 1.19, "learning_rate": 3.784533390012769e-05, "loss": 1.0977, "step": 9590 }, { "epoch": 1.19, "learning_rate": 3.7814449411480836e-05, "loss": 1.1104, "step": 9600 }, { "epoch": 1.19, "learning_rate": 3.7783538373641296e-05, "loss": 1.0976, "step": 9610 }, { "epoch": 1.19, "learning_rate": 3.775260085065107e-05, "loss": 1.1409, "step": 9620 }, { "epoch": 1.19, "learning_rate": 3.772163690660704e-05, "loss": 1.1272, "step": 9630 }, { "epoch": 1.19, "learning_rate": 3.7690646605660826e-05, "loss": 1.1082, "step": 9640 }, { "epoch": 1.19, "learning_rate": 3.7659630012018676e-05, "loss": 1.0926, "step": 9650 }, { "epoch": 1.19, "learning_rate": 3.7628587189941276e-05, "loss": 1.1474, "step": 9660 }, { "epoch": 1.2, "learning_rate": 3.759751820374367e-05, "loss": 1.0877, "step": 9670 }, { "epoch": 1.2, "learning_rate": 3.756642311779511e-05, "loss": 1.1289, "step": 9680 }, { "epoch": 1.2, "learning_rate": 3.753530199651892e-05, "loss": 1.101, "step": 9690 }, { "epoch": 1.2, "learning_rate": 3.750415490439237e-05, "loss": 1.0849, "step": 9700 }, { "epoch": 1.2, "learning_rate": 3.7472981905946535e-05, "loss": 1.087, "step": 9710 }, { "epoch": 1.2, "learning_rate": 3.744178306576614e-05, "loss": 1.1201, "step": 9720 }, { "epoch": 1.2, "learning_rate": 3.741055844848949e-05, "loss": 1.1066, "step": 9730 }, { "epoch": 1.2, "learning_rate": 3.737930811880827e-05, "loss": 1.0818, "step": 9740 }, { "epoch": 1.21, "learning_rate": 3.7348032141467414e-05, "loss": 1.1116, "step": 9750 }, { "epoch": 1.21, "learning_rate": 3.7316730581265054e-05, "loss": 1.1193, "step": 9760 }, { "epoch": 1.21, "learning_rate": 3.728540350305227e-05, "loss": 1.0784, "step": 9770 }, { "epoch": 1.21, "learning_rate": 3.725405097173306e-05, "loss": 1.1277, "step": 9780 }, { "epoch": 1.21, "learning_rate": 3.722267305226409e-05, "loss": 1.1043, "step": 9790 }, { "epoch": 1.21, "learning_rate": 3.71912698096547e-05, "loss": 1.1234, "step": 9800 }, { "epoch": 1.21, "learning_rate": 3.7159841308966626e-05, "loss": 1.1158, "step": 9810 }, { "epoch": 1.21, "learning_rate": 3.7128387615313986e-05, "loss": 1.1216, "step": 9820 }, { "epoch": 1.22, "learning_rate": 3.709690879386306e-05, "loss": 1.0826, "step": 9830 }, { "epoch": 1.22, "learning_rate": 3.7065404909832224e-05, "loss": 1.1404, "step": 9840 }, { "epoch": 1.22, "learning_rate": 3.7033876028491734e-05, "loss": 1.1411, "step": 9850 }, { "epoch": 1.22, "learning_rate": 3.700232221516367e-05, "loss": 1.0914, "step": 9860 }, { "epoch": 1.22, "learning_rate": 3.6970743535221755e-05, "loss": 1.0979, "step": 9870 }, { "epoch": 1.22, "learning_rate": 3.693914005409123e-05, "loss": 1.1405, "step": 9880 }, { "epoch": 1.22, "learning_rate": 3.690751183724872e-05, "loss": 1.0844, "step": 9890 }, { "epoch": 1.22, "learning_rate": 3.6875858950222095e-05, "loss": 1.0991, "step": 9900 }, { "epoch": 1.23, "learning_rate": 3.684418145859036e-05, "loss": 1.0847, "step": 9910 }, { "epoch": 1.23, "learning_rate": 3.681247942798345e-05, "loss": 1.1008, "step": 9920 }, { "epoch": 1.23, "learning_rate": 3.6780752924082175e-05, "loss": 1.1674, "step": 9930 }, { "epoch": 1.23, "learning_rate": 3.6749002012618055e-05, "loss": 1.0449, "step": 9940 }, { "epoch": 1.23, "learning_rate": 3.671722675937316e-05, "loss": 1.1063, "step": 9950 }, { "epoch": 1.23, "learning_rate": 3.6685427230179984e-05, "loss": 1.1387, "step": 9960 }, { "epoch": 1.23, "learning_rate": 3.6653603490921335e-05, "loss": 1.1268, "step": 9970 }, { "epoch": 1.23, "learning_rate": 3.662175560753018e-05, "loss": 1.1255, "step": 9980 }, { "epoch": 1.24, "learning_rate": 3.6589883645989495e-05, "loss": 1.1349, "step": 9990 }, { "epoch": 1.24, "learning_rate": 3.655798767233215e-05, "loss": 1.1007, "step": 10000 }, { "epoch": 1.24, "eval_loss": 1.0268759727478027, "eval_runtime": 5.7616, "eval_samples_per_second": 89.906, "eval_steps_per_second": 11.282, "step": 10000 }, { "epoch": 1.24, "learning_rate": 3.652606775264077e-05, "loss": 1.1228, "step": 10010 }, { "epoch": 1.24, "learning_rate": 3.649412395304757e-05, "loss": 1.1163, "step": 10020 }, { "epoch": 1.24, "learning_rate": 3.646215633973425e-05, "loss": 1.1201, "step": 10030 }, { "epoch": 1.24, "learning_rate": 3.643016497893187e-05, "loss": 1.1287, "step": 10040 }, { "epoch": 1.24, "learning_rate": 3.639814993692065e-05, "loss": 1.115, "step": 10050 }, { "epoch": 1.24, "learning_rate": 3.6366111280029927e-05, "loss": 1.1101, "step": 10060 }, { "epoch": 1.25, "learning_rate": 3.6334049074637896e-05, "loss": 1.1096, "step": 10070 }, { "epoch": 1.25, "learning_rate": 3.63019633871716e-05, "loss": 1.0797, "step": 10080 }, { "epoch": 1.25, "learning_rate": 3.6269854284106686e-05, "loss": 1.08, "step": 10090 }, { "epoch": 1.25, "learning_rate": 3.623772183196735e-05, "loss": 1.1361, "step": 10100 }, { "epoch": 1.25, "learning_rate": 3.620556609732616e-05, "loss": 1.1033, "step": 10110 }, { "epoch": 1.25, "learning_rate": 3.617338714680389e-05, "loss": 1.1069, "step": 10120 }, { "epoch": 1.25, "learning_rate": 3.614118504706943e-05, "loss": 1.1182, "step": 10130 }, { "epoch": 1.25, "learning_rate": 3.610895986483965e-05, "loss": 1.1513, "step": 10140 }, { "epoch": 1.26, "learning_rate": 3.6076711666879234e-05, "loss": 1.1092, "step": 10150 }, { "epoch": 1.26, "learning_rate": 3.604444052000052e-05, "loss": 1.1428, "step": 10160 }, { "epoch": 1.26, "learning_rate": 3.601214649106343e-05, "loss": 1.1113, "step": 10170 }, { "epoch": 1.26, "learning_rate": 3.597982964697527e-05, "loss": 1.1404, "step": 10180 }, { "epoch": 1.26, "learning_rate": 3.594749005469063e-05, "loss": 1.1082, "step": 10190 }, { "epoch": 1.26, "learning_rate": 3.5915127781211215e-05, "loss": 1.1014, "step": 10200 }, { "epoch": 1.26, "learning_rate": 3.5882742893585745e-05, "loss": 1.1115, "step": 10210 }, { "epoch": 1.26, "learning_rate": 3.585033545890976e-05, "loss": 1.1311, "step": 10220 }, { "epoch": 1.27, "learning_rate": 3.581790554432554e-05, "loss": 1.0668, "step": 10230 }, { "epoch": 1.27, "learning_rate": 3.5785453217021926e-05, "loss": 1.1143, "step": 10240 }, { "epoch": 1.27, "learning_rate": 3.575297854423421e-05, "loss": 1.1524, "step": 10250 }, { "epoch": 1.27, "learning_rate": 3.572048159324394e-05, "loss": 1.1086, "step": 10260 }, { "epoch": 1.27, "learning_rate": 3.5687962431378875e-05, "loss": 1.0636, "step": 10270 }, { "epoch": 1.27, "learning_rate": 3.565542112601275e-05, "loss": 1.1111, "step": 10280 }, { "epoch": 1.27, "learning_rate": 3.56228577445652e-05, "loss": 1.1441, "step": 10290 }, { "epoch": 1.27, "learning_rate": 3.559027235450158e-05, "loss": 1.1377, "step": 10300 }, { "epoch": 1.28, "learning_rate": 3.555766502333285e-05, "loss": 1.1123, "step": 10310 }, { "epoch": 1.28, "learning_rate": 3.552503581861544e-05, "loss": 1.1103, "step": 10320 }, { "epoch": 1.28, "learning_rate": 3.549238480795107e-05, "loss": 1.1019, "step": 10330 }, { "epoch": 1.28, "learning_rate": 3.545971205898668e-05, "loss": 1.153, "step": 10340 }, { "epoch": 1.28, "learning_rate": 3.5427017639414206e-05, "loss": 1.1124, "step": 10350 }, { "epoch": 1.28, "learning_rate": 3.53943016169705e-05, "loss": 1.0971, "step": 10360 }, { "epoch": 1.28, "learning_rate": 3.536156405943717e-05, "loss": 1.0987, "step": 10370 }, { "epoch": 1.28, "learning_rate": 3.532880503464045e-05, "loss": 1.104, "step": 10380 }, { "epoch": 1.29, "learning_rate": 3.529602461045102e-05, "loss": 1.1113, "step": 10390 }, { "epoch": 1.29, "learning_rate": 3.526322285478394e-05, "loss": 1.0833, "step": 10400 }, { "epoch": 1.29, "learning_rate": 3.523039983559843e-05, "loss": 1.0938, "step": 10410 }, { "epoch": 1.29, "learning_rate": 3.519755562089778e-05, "loss": 1.0916, "step": 10420 }, { "epoch": 1.29, "learning_rate": 3.516469027872917e-05, "loss": 1.1295, "step": 10430 }, { "epoch": 1.29, "learning_rate": 3.51318038771836e-05, "loss": 1.1001, "step": 10440 }, { "epoch": 1.29, "learning_rate": 3.509889648439565e-05, "loss": 1.1012, "step": 10450 }, { "epoch": 1.29, "learning_rate": 3.506596816854343e-05, "loss": 1.0968, "step": 10460 }, { "epoch": 1.3, "learning_rate": 3.503301899784837e-05, "loss": 1.147, "step": 10470 }, { "epoch": 1.3, "learning_rate": 3.500004904057511e-05, "loss": 1.1021, "step": 10480 }, { "epoch": 1.3, "learning_rate": 3.496705836503138e-05, "loss": 1.1351, "step": 10490 }, { "epoch": 1.3, "learning_rate": 3.4934047039567806e-05, "loss": 1.0983, "step": 10500 }, { "epoch": 1.3, "eval_loss": 1.0228146314620972, "eval_runtime": 5.7633, "eval_samples_per_second": 89.88, "eval_steps_per_second": 11.278, "step": 10500 }, { "epoch": 1.3, "learning_rate": 3.4901015132577825e-05, "loss": 1.0657, "step": 10510 }, { "epoch": 1.3, "learning_rate": 3.4867962712497486e-05, "loss": 1.111, "step": 10520 }, { "epoch": 1.3, "learning_rate": 3.483488984780536e-05, "loss": 1.0828, "step": 10530 }, { "epoch": 1.3, "learning_rate": 3.480179660702235e-05, "loss": 1.1044, "step": 10540 }, { "epoch": 1.31, "learning_rate": 3.476868305871162e-05, "loss": 1.0812, "step": 10550 }, { "epoch": 1.31, "learning_rate": 3.473554927147835e-05, "loss": 1.1466, "step": 10560 }, { "epoch": 1.31, "learning_rate": 3.470239531396969e-05, "loss": 1.0785, "step": 10570 }, { "epoch": 1.31, "learning_rate": 3.4669221254874586e-05, "loss": 1.1019, "step": 10580 }, { "epoch": 1.31, "learning_rate": 3.463602716292358e-05, "loss": 1.1121, "step": 10590 }, { "epoch": 1.31, "learning_rate": 3.4602813106888784e-05, "loss": 1.1051, "step": 10600 }, { "epoch": 1.31, "learning_rate": 3.456957915558362e-05, "loss": 1.1127, "step": 10610 }, { "epoch": 1.31, "learning_rate": 3.4536325377862765e-05, "loss": 1.151, "step": 10620 }, { "epoch": 1.31, "learning_rate": 3.450305184262194e-05, "loss": 1.1116, "step": 10630 }, { "epoch": 1.32, "learning_rate": 3.446975861879783e-05, "loss": 1.1014, "step": 10640 }, { "epoch": 1.32, "learning_rate": 3.44364457753679e-05, "loss": 1.1474, "step": 10650 }, { "epoch": 1.32, "learning_rate": 3.440311338135025e-05, "loss": 1.1454, "step": 10660 }, { "epoch": 1.32, "learning_rate": 3.43697615058035e-05, "loss": 1.1614, "step": 10670 }, { "epoch": 1.32, "learning_rate": 3.433639021782664e-05, "loss": 1.1296, "step": 10680 }, { "epoch": 1.32, "learning_rate": 3.4302999586558855e-05, "loss": 1.132, "step": 10690 }, { "epoch": 1.32, "learning_rate": 3.426958968117943e-05, "loss": 1.1189, "step": 10700 }, { "epoch": 1.32, "learning_rate": 3.423616057090755e-05, "loss": 1.0983, "step": 10710 }, { "epoch": 1.33, "learning_rate": 3.420271232500224e-05, "loss": 1.1026, "step": 10720 }, { "epoch": 1.33, "learning_rate": 3.416924501276212e-05, "loss": 1.1262, "step": 10730 }, { "epoch": 1.33, "learning_rate": 3.4135758703525337e-05, "loss": 1.1233, "step": 10740 }, { "epoch": 1.33, "learning_rate": 3.41022534666694e-05, "loss": 1.0908, "step": 10750 }, { "epoch": 1.33, "learning_rate": 3.406872937161101e-05, "loss": 1.0839, "step": 10760 }, { "epoch": 1.33, "learning_rate": 3.403518648780597e-05, "loss": 1.1074, "step": 10770 }, { "epoch": 1.33, "learning_rate": 3.4001624884748975e-05, "loss": 1.0915, "step": 10780 }, { "epoch": 1.33, "learning_rate": 3.396804463197353e-05, "loss": 1.1377, "step": 10790 }, { "epoch": 1.34, "learning_rate": 3.393444579905177e-05, "loss": 1.0913, "step": 10800 }, { "epoch": 1.34, "learning_rate": 3.390082845559434e-05, "loss": 1.1673, "step": 10810 }, { "epoch": 1.34, "learning_rate": 3.386719267125018e-05, "loss": 1.12, "step": 10820 }, { "epoch": 1.34, "learning_rate": 3.383353851570652e-05, "loss": 1.0588, "step": 10830 }, { "epoch": 1.34, "learning_rate": 3.379986605868858e-05, "loss": 1.0715, "step": 10840 }, { "epoch": 1.34, "learning_rate": 3.3766175369959536e-05, "loss": 1.0724, "step": 10850 }, { "epoch": 1.34, "learning_rate": 3.373246651932032e-05, "loss": 1.0968, "step": 10860 }, { "epoch": 1.34, "learning_rate": 3.36987395766095e-05, "loss": 1.092, "step": 10870 }, { "epoch": 1.35, "learning_rate": 3.366499461170314e-05, "loss": 1.1112, "step": 10880 }, { "epoch": 1.35, "learning_rate": 3.363123169451461e-05, "loss": 1.0916, "step": 10890 }, { "epoch": 1.35, "learning_rate": 3.359745089499448e-05, "loss": 1.1086, "step": 10900 }, { "epoch": 1.35, "learning_rate": 3.356365228313041e-05, "loss": 1.0841, "step": 10910 }, { "epoch": 1.35, "learning_rate": 3.352983592894692e-05, "loss": 1.0953, "step": 10920 }, { "epoch": 1.35, "learning_rate": 3.349600190250531e-05, "loss": 1.0618, "step": 10930 }, { "epoch": 1.35, "learning_rate": 3.346215027390349e-05, "loss": 1.065, "step": 10940 }, { "epoch": 1.35, "learning_rate": 3.342828111327582e-05, "loss": 1.1423, "step": 10950 }, { "epoch": 1.36, "learning_rate": 3.3394394490793004e-05, "loss": 1.0813, "step": 10960 }, { "epoch": 1.36, "learning_rate": 3.336049047666193e-05, "loss": 1.1, "step": 10970 }, { "epoch": 1.36, "learning_rate": 3.3326569141125497e-05, "loss": 1.1031, "step": 10980 }, { "epoch": 1.36, "learning_rate": 3.3292630554462505e-05, "loss": 1.0862, "step": 10990 }, { "epoch": 1.36, "learning_rate": 3.3258674786987496e-05, "loss": 1.0946, "step": 11000 }, { "epoch": 1.36, "eval_loss": 1.0201221704483032, "eval_runtime": 5.7649, "eval_samples_per_second": 89.855, "eval_steps_per_second": 11.275, "step": 11000 }, { "epoch": 1.36, "learning_rate": 3.322470190905059e-05, "loss": 1.0983, "step": 11010 }, { "epoch": 1.36, "learning_rate": 3.319071199103737e-05, "loss": 1.1614, "step": 11020 }, { "epoch": 1.36, "learning_rate": 3.315670510336874e-05, "loss": 1.1385, "step": 11030 }, { "epoch": 1.37, "learning_rate": 3.312268131650072e-05, "loss": 1.0788, "step": 11040 }, { "epoch": 1.37, "learning_rate": 3.308864070092438e-05, "loss": 1.1204, "step": 11050 }, { "epoch": 1.37, "learning_rate": 3.305458332716565e-05, "loss": 1.1076, "step": 11060 }, { "epoch": 1.37, "learning_rate": 3.3020509265785154e-05, "loss": 1.08, "step": 11070 }, { "epoch": 1.37, "learning_rate": 3.2986418587378124e-05, "loss": 1.118, "step": 11080 }, { "epoch": 1.37, "learning_rate": 3.2952311362574184e-05, "loss": 1.1275, "step": 11090 }, { "epoch": 1.37, "learning_rate": 3.2918187662037276e-05, "loss": 1.1198, "step": 11100 }, { "epoch": 1.37, "learning_rate": 3.288404755646545e-05, "loss": 1.1093, "step": 11110 }, { "epoch": 1.38, "learning_rate": 3.284989111659076e-05, "loss": 1.116, "step": 11120 }, { "epoch": 1.38, "learning_rate": 3.281571841317909e-05, "loss": 1.1058, "step": 11130 }, { "epoch": 1.38, "learning_rate": 3.2781529517030026e-05, "loss": 1.1132, "step": 11140 }, { "epoch": 1.38, "learning_rate": 3.27473244989767e-05, "loss": 1.1322, "step": 11150 }, { "epoch": 1.38, "learning_rate": 3.271310342988564e-05, "loss": 1.0679, "step": 11160 }, { "epoch": 1.38, "learning_rate": 3.267886638065664e-05, "loss": 1.1059, "step": 11170 }, { "epoch": 1.38, "learning_rate": 3.26446134222226e-05, "loss": 1.1099, "step": 11180 }, { "epoch": 1.38, "learning_rate": 3.261034462554937e-05, "loss": 1.1102, "step": 11190 }, { "epoch": 1.39, "learning_rate": 3.257606006163563e-05, "loss": 1.1212, "step": 11200 }, { "epoch": 1.39, "learning_rate": 3.2541759801512714e-05, "loss": 1.0798, "step": 11210 }, { "epoch": 1.39, "learning_rate": 3.250744391624448e-05, "loss": 1.0891, "step": 11220 }, { "epoch": 1.39, "learning_rate": 3.247311247692717e-05, "loss": 1.0806, "step": 11230 }, { "epoch": 1.39, "learning_rate": 3.243876555468922e-05, "loss": 1.0772, "step": 11240 }, { "epoch": 1.39, "learning_rate": 3.2404403220691174e-05, "loss": 1.1413, "step": 11250 }, { "epoch": 1.39, "learning_rate": 3.2370025546125494e-05, "loss": 1.1231, "step": 11260 }, { "epoch": 1.39, "learning_rate": 3.2335632602216434e-05, "loss": 1.0717, "step": 11270 }, { "epoch": 1.4, "learning_rate": 3.230122446021986e-05, "loss": 1.0537, "step": 11280 }, { "epoch": 1.4, "learning_rate": 3.2266801191423155e-05, "loss": 1.1031, "step": 11290 }, { "epoch": 1.4, "learning_rate": 3.223236286714503e-05, "loss": 1.0522, "step": 11300 }, { "epoch": 1.4, "learning_rate": 3.2197909558735373e-05, "loss": 1.1081, "step": 11310 }, { "epoch": 1.4, "learning_rate": 3.216344133757514e-05, "loss": 1.129, "step": 11320 }, { "epoch": 1.4, "learning_rate": 3.212895827507617e-05, "loss": 1.0907, "step": 11330 }, { "epoch": 1.4, "learning_rate": 3.209446044268106e-05, "loss": 1.0669, "step": 11340 }, { "epoch": 1.4, "learning_rate": 3.2059947911863e-05, "loss": 1.1227, "step": 11350 }, { "epoch": 1.41, "learning_rate": 3.202542075412563e-05, "loss": 1.1408, "step": 11360 }, { "epoch": 1.41, "learning_rate": 3.199087904100291e-05, "loss": 1.0806, "step": 11370 }, { "epoch": 1.41, "learning_rate": 3.195632284405893e-05, "loss": 1.101, "step": 11380 }, { "epoch": 1.41, "learning_rate": 3.1921752234887805e-05, "loss": 1.1398, "step": 11390 }, { "epoch": 1.41, "learning_rate": 3.1887167285113504e-05, "loss": 1.1293, "step": 11400 }, { "epoch": 1.41, "learning_rate": 3.1852568066389724e-05, "loss": 1.0837, "step": 11410 }, { "epoch": 1.41, "learning_rate": 3.1817954650399704e-05, "loss": 1.0671, "step": 11420 }, { "epoch": 1.41, "learning_rate": 3.17833271088561e-05, "loss": 1.1242, "step": 11430 }, { "epoch": 1.42, "learning_rate": 3.1748685513500845e-05, "loss": 1.0606, "step": 11440 }, { "epoch": 1.42, "learning_rate": 3.171402993610496e-05, "loss": 1.0741, "step": 11450 }, { "epoch": 1.42, "learning_rate": 3.167936044846849e-05, "loss": 1.0893, "step": 11460 }, { "epoch": 1.42, "learning_rate": 3.164467712242023e-05, "loss": 1.1121, "step": 11470 }, { "epoch": 1.42, "learning_rate": 3.160998002981771e-05, "loss": 1.0779, "step": 11480 }, { "epoch": 1.42, "learning_rate": 3.1575269242546927e-05, "loss": 1.0568, "step": 11490 }, { "epoch": 1.42, "learning_rate": 3.15405448325223e-05, "loss": 1.127, "step": 11500 }, { "epoch": 1.42, "eval_loss": 1.0143646001815796, "eval_runtime": 5.7627, "eval_samples_per_second": 89.888, "eval_steps_per_second": 11.279, "step": 11500 }, { "epoch": 1.42, "learning_rate": 3.1505806871686426e-05, "loss": 1.1001, "step": 11510 }, { "epoch": 1.43, "learning_rate": 3.147105543201002e-05, "loss": 1.1072, "step": 11520 }, { "epoch": 1.43, "learning_rate": 3.1436290585491686e-05, "loss": 1.1027, "step": 11530 }, { "epoch": 1.43, "learning_rate": 3.140151240415782e-05, "loss": 1.0892, "step": 11540 }, { "epoch": 1.43, "learning_rate": 3.136672096006246e-05, "loss": 1.1202, "step": 11550 }, { "epoch": 1.43, "learning_rate": 3.1331916325287094e-05, "loss": 1.0918, "step": 11560 }, { "epoch": 1.43, "learning_rate": 3.1297098571940556e-05, "loss": 1.0614, "step": 11570 }, { "epoch": 1.43, "learning_rate": 3.126226777215886e-05, "loss": 1.1036, "step": 11580 }, { "epoch": 1.43, "learning_rate": 3.122742399810503e-05, "loss": 1.0841, "step": 11590 }, { "epoch": 1.43, "learning_rate": 3.1192567321969e-05, "loss": 1.0612, "step": 11600 }, { "epoch": 1.44, "learning_rate": 3.1157697815967424e-05, "loss": 1.1428, "step": 11610 }, { "epoch": 1.44, "learning_rate": 3.112281555234352e-05, "loss": 1.0921, "step": 11620 }, { "epoch": 1.44, "learning_rate": 3.108792060336695e-05, "loss": 1.0796, "step": 11630 }, { "epoch": 1.44, "learning_rate": 3.105301304133368e-05, "loss": 1.0947, "step": 11640 }, { "epoch": 1.44, "learning_rate": 3.1018092938565765e-05, "loss": 1.1185, "step": 11650 }, { "epoch": 1.44, "learning_rate": 3.098316036741127e-05, "loss": 1.0856, "step": 11660 }, { "epoch": 1.44, "learning_rate": 3.0948215400244085e-05, "loss": 1.1219, "step": 11670 }, { "epoch": 1.44, "learning_rate": 3.091325810946378e-05, "loss": 1.0856, "step": 11680 }, { "epoch": 1.45, "learning_rate": 3.087828856749547e-05, "loss": 1.0639, "step": 11690 }, { "epoch": 1.45, "learning_rate": 3.084330684678964e-05, "loss": 1.0643, "step": 11700 }, { "epoch": 1.45, "learning_rate": 3.0808313019822e-05, "loss": 1.1239, "step": 11710 }, { "epoch": 1.45, "learning_rate": 3.0773307159093365e-05, "loss": 1.0759, "step": 11720 }, { "epoch": 1.45, "learning_rate": 3.0738289337129454e-05, "loss": 1.0969, "step": 11730 }, { "epoch": 1.45, "learning_rate": 3.0703259626480797e-05, "loss": 1.1094, "step": 11740 }, { "epoch": 1.45, "learning_rate": 3.066821809972253e-05, "loss": 1.0912, "step": 11750 }, { "epoch": 1.45, "learning_rate": 3.063316482945429e-05, "loss": 1.1278, "step": 11760 }, { "epoch": 1.46, "learning_rate": 3.059809988830002e-05, "loss": 1.1022, "step": 11770 }, { "epoch": 1.46, "learning_rate": 3.056302334890786e-05, "loss": 1.1197, "step": 11780 }, { "epoch": 1.46, "learning_rate": 3.0527935283949986e-05, "loss": 1.1254, "step": 11790 }, { "epoch": 1.46, "learning_rate": 3.0492835766122442e-05, "loss": 1.1088, "step": 11800 }, { "epoch": 1.46, "learning_rate": 3.0457724868145e-05, "loss": 1.0983, "step": 11810 }, { "epoch": 1.46, "learning_rate": 3.0422602662761002e-05, "loss": 1.0614, "step": 11820 }, { "epoch": 1.46, "learning_rate": 3.0387469222737236e-05, "loss": 1.116, "step": 11830 }, { "epoch": 1.46, "learning_rate": 3.035232462086376e-05, "loss": 1.0658, "step": 11840 }, { "epoch": 1.47, "learning_rate": 3.0317168929953743e-05, "loss": 1.0905, "step": 11850 }, { "epoch": 1.47, "learning_rate": 3.028200222284334e-05, "loss": 1.0912, "step": 11860 }, { "epoch": 1.47, "learning_rate": 3.0246824572391542e-05, "loss": 1.0641, "step": 11870 }, { "epoch": 1.47, "learning_rate": 3.0211636051479985e-05, "loss": 1.1061, "step": 11880 }, { "epoch": 1.47, "learning_rate": 3.0176436733012846e-05, "loss": 1.1396, "step": 11890 }, { "epoch": 1.47, "learning_rate": 3.0141226689916667e-05, "loss": 1.0759, "step": 11900 }, { "epoch": 1.47, "learning_rate": 3.0106005995140212e-05, "loss": 1.1406, "step": 11910 }, { "epoch": 1.47, "learning_rate": 3.0070774721654306e-05, "loss": 1.1354, "step": 11920 }, { "epoch": 1.48, "learning_rate": 3.0035532942451705e-05, "loss": 1.1265, "step": 11930 }, { "epoch": 1.48, "learning_rate": 3.0000280730546913e-05, "loss": 1.0837, "step": 11940 }, { "epoch": 1.48, "learning_rate": 2.9965018158976064e-05, "loss": 1.1294, "step": 11950 }, { "epoch": 1.48, "learning_rate": 2.9929745300796747e-05, "loss": 1.0733, "step": 11960 }, { "epoch": 1.48, "learning_rate": 2.9894462229087866e-05, "loss": 1.1176, "step": 11970 }, { "epoch": 1.48, "learning_rate": 2.9859169016949484e-05, "loss": 1.1409, "step": 11980 }, { "epoch": 1.48, "learning_rate": 2.982386573750267e-05, "loss": 1.1176, "step": 11990 }, { "epoch": 1.48, "learning_rate": 2.9788552463889364e-05, "loss": 1.1087, "step": 12000 }, { "epoch": 1.48, "eval_loss": 1.014037847518921, "eval_runtime": 5.7696, "eval_samples_per_second": 89.781, "eval_steps_per_second": 11.266, "step": 12000 }, { "epoch": 1.49, "learning_rate": 2.97532292692722e-05, "loss": 1.1168, "step": 12010 }, { "epoch": 1.49, "learning_rate": 2.9717896226834364e-05, "loss": 1.0966, "step": 12020 }, { "epoch": 1.49, "learning_rate": 2.9682553409779447e-05, "loss": 1.1, "step": 12030 }, { "epoch": 1.49, "learning_rate": 2.9647200891331306e-05, "loss": 1.0777, "step": 12040 }, { "epoch": 1.49, "learning_rate": 2.961183874473388e-05, "loss": 1.0779, "step": 12050 }, { "epoch": 1.49, "learning_rate": 2.9576467043251054e-05, "loss": 1.1108, "step": 12060 }, { "epoch": 1.49, "learning_rate": 2.9541085860166528e-05, "loss": 1.1019, "step": 12070 }, { "epoch": 1.49, "learning_rate": 2.950569526878363e-05, "loss": 1.1045, "step": 12080 }, { "epoch": 1.5, "learning_rate": 2.9470295342425186e-05, "loss": 1.1158, "step": 12090 }, { "epoch": 1.5, "learning_rate": 2.943488615443336e-05, "loss": 1.1094, "step": 12100 }, { "epoch": 1.5, "learning_rate": 2.9399467778169502e-05, "loss": 1.1158, "step": 12110 }, { "epoch": 1.5, "learning_rate": 2.9364040287014012e-05, "loss": 1.0872, "step": 12120 }, { "epoch": 1.5, "learning_rate": 2.932860375436615e-05, "loss": 1.0977, "step": 12130 }, { "epoch": 1.5, "learning_rate": 2.9293158253643936e-05, "loss": 1.0815, "step": 12140 }, { "epoch": 1.5, "learning_rate": 2.9257703858283958e-05, "loss": 1.087, "step": 12150 }, { "epoch": 1.5, "learning_rate": 2.9222240641741216e-05, "loss": 1.1078, "step": 12160 }, { "epoch": 1.51, "learning_rate": 2.918676867748902e-05, "loss": 1.0413, "step": 12170 }, { "epoch": 1.51, "learning_rate": 2.9151288039018766e-05, "loss": 1.1274, "step": 12180 }, { "epoch": 1.51, "learning_rate": 2.911579879983985e-05, "loss": 1.0624, "step": 12190 }, { "epoch": 1.51, "learning_rate": 2.9080301033479475e-05, "loss": 1.0967, "step": 12200 }, { "epoch": 1.51, "learning_rate": 2.9044794813482518e-05, "loss": 1.0794, "step": 12210 }, { "epoch": 1.51, "learning_rate": 2.9009280213411356e-05, "loss": 1.0939, "step": 12220 }, { "epoch": 1.51, "learning_rate": 2.8973757306845738e-05, "loss": 1.1248, "step": 12230 }, { "epoch": 1.51, "learning_rate": 2.8938226167382636e-05, "loss": 1.0969, "step": 12240 }, { "epoch": 1.52, "learning_rate": 2.8902686868636047e-05, "loss": 1.1171, "step": 12250 }, { "epoch": 1.52, "learning_rate": 2.8867139484236898e-05, "loss": 1.0909, "step": 12260 }, { "epoch": 1.52, "learning_rate": 2.8831584087832864e-05, "loss": 1.0625, "step": 12270 }, { "epoch": 1.52, "learning_rate": 2.879602075308821e-05, "loss": 1.0956, "step": 12280 }, { "epoch": 1.52, "learning_rate": 2.8760449553683656e-05, "loss": 1.093, "step": 12290 }, { "epoch": 1.52, "learning_rate": 2.872487056331621e-05, "loss": 1.0674, "step": 12300 }, { "epoch": 1.52, "learning_rate": 2.868928385569903e-05, "loss": 1.0648, "step": 12310 }, { "epoch": 1.52, "learning_rate": 2.8653689504561253e-05, "loss": 1.1238, "step": 12320 }, { "epoch": 1.53, "learning_rate": 2.8618087583647857e-05, "loss": 1.12, "step": 12330 }, { "epoch": 1.53, "learning_rate": 2.8582478166719512e-05, "loss": 1.115, "step": 12340 }, { "epoch": 1.53, "learning_rate": 2.85468613275524e-05, "loss": 1.1158, "step": 12350 }, { "epoch": 1.53, "learning_rate": 2.851123713993809e-05, "loss": 1.1165, "step": 12360 }, { "epoch": 1.53, "learning_rate": 2.8475605677683383e-05, "loss": 1.0985, "step": 12370 }, { "epoch": 1.53, "learning_rate": 2.8439967014610134e-05, "loss": 1.0928, "step": 12380 }, { "epoch": 1.53, "learning_rate": 2.8404321224555135e-05, "loss": 1.0878, "step": 12390 }, { "epoch": 1.53, "learning_rate": 2.8368668381369934e-05, "loss": 1.0734, "step": 12400 }, { "epoch": 1.54, "learning_rate": 2.8333008558920694e-05, "loss": 1.1231, "step": 12410 }, { "epoch": 1.54, "learning_rate": 2.829734183108803e-05, "loss": 1.0822, "step": 12420 }, { "epoch": 1.54, "learning_rate": 2.8261668271766882e-05, "loss": 1.0886, "step": 12430 }, { "epoch": 1.54, "learning_rate": 2.8225987954866325e-05, "loss": 1.0877, "step": 12440 }, { "epoch": 1.54, "learning_rate": 2.8190300954309447e-05, "loss": 1.0806, "step": 12450 }, { "epoch": 1.54, "learning_rate": 2.8154607344033174e-05, "loss": 1.1316, "step": 12460 }, { "epoch": 1.54, "learning_rate": 2.8118907197988133e-05, "loss": 1.1096, "step": 12470 }, { "epoch": 1.54, "learning_rate": 2.8083200590138492e-05, "loss": 1.0957, "step": 12480 }, { "epoch": 1.55, "learning_rate": 2.8047487594461797e-05, "loss": 1.0703, "step": 12490 }, { "epoch": 1.55, "learning_rate": 2.801176828494884e-05, "loss": 1.0958, "step": 12500 }, { "epoch": 1.55, "eval_loss": 1.0098438262939453, "eval_runtime": 5.7652, "eval_samples_per_second": 89.849, "eval_steps_per_second": 11.275, "step": 12500 }, { "epoch": 1.55, "learning_rate": 2.7976042735603496e-05, "loss": 1.1187, "step": 12510 }, { "epoch": 1.55, "learning_rate": 2.794031102044255e-05, "loss": 1.0942, "step": 12520 }, { "epoch": 1.55, "learning_rate": 2.7904573213495576e-05, "loss": 1.1351, "step": 12530 }, { "epoch": 1.55, "learning_rate": 2.7868829388804775e-05, "loss": 1.0822, "step": 12540 }, { "epoch": 1.55, "learning_rate": 2.783307962042479e-05, "loss": 1.1083, "step": 12550 }, { "epoch": 1.55, "learning_rate": 2.7797323982422614e-05, "loss": 1.0942, "step": 12560 }, { "epoch": 1.55, "learning_rate": 2.776156254887736e-05, "loss": 1.0598, "step": 12570 }, { "epoch": 1.56, "learning_rate": 2.7725795393880195e-05, "loss": 1.1061, "step": 12580 }, { "epoch": 1.56, "learning_rate": 2.7690022591534093e-05, "loss": 1.0706, "step": 12590 }, { "epoch": 1.56, "learning_rate": 2.765424421595376e-05, "loss": 1.0994, "step": 12600 }, { "epoch": 1.56, "learning_rate": 2.7618460341265438e-05, "loss": 1.1187, "step": 12610 }, { "epoch": 1.56, "learning_rate": 2.758267104160676e-05, "loss": 1.0757, "step": 12620 }, { "epoch": 1.56, "learning_rate": 2.7546876391126608e-05, "loss": 1.1194, "step": 12630 }, { "epoch": 1.56, "learning_rate": 2.7511076463984936e-05, "loss": 1.1067, "step": 12640 }, { "epoch": 1.56, "learning_rate": 2.7475271334352644e-05, "loss": 1.0822, "step": 12650 }, { "epoch": 1.57, "learning_rate": 2.7439461076411395e-05, "loss": 1.0869, "step": 12660 }, { "epoch": 1.57, "learning_rate": 2.7403645764353498e-05, "loss": 1.111, "step": 12670 }, { "epoch": 1.57, "learning_rate": 2.7367825472381707e-05, "loss": 1.0966, "step": 12680 }, { "epoch": 1.57, "learning_rate": 2.7332000274709124e-05, "loss": 1.1024, "step": 12690 }, { "epoch": 1.57, "learning_rate": 2.7296170245558988e-05, "loss": 1.1165, "step": 12700 }, { "epoch": 1.57, "learning_rate": 2.726033545916456e-05, "loss": 1.0686, "step": 12710 }, { "epoch": 1.57, "learning_rate": 2.7224495989768956e-05, "loss": 1.0751, "step": 12720 }, { "epoch": 1.57, "learning_rate": 2.7188651911624995e-05, "loss": 1.0881, "step": 12730 }, { "epoch": 1.58, "learning_rate": 2.7152803298995045e-05, "loss": 1.076, "step": 12740 }, { "epoch": 1.58, "learning_rate": 2.7116950226150863e-05, "loss": 1.1037, "step": 12750 }, { "epoch": 1.58, "learning_rate": 2.7081092767373455e-05, "loss": 1.1052, "step": 12760 }, { "epoch": 1.58, "learning_rate": 2.704523099695291e-05, "loss": 1.0942, "step": 12770 }, { "epoch": 1.58, "learning_rate": 2.700936498918824e-05, "loss": 1.123, "step": 12780 }, { "epoch": 1.58, "learning_rate": 2.6973494818387257e-05, "loss": 1.1012, "step": 12790 }, { "epoch": 1.58, "learning_rate": 2.6937620558866378e-05, "loss": 1.0795, "step": 12800 }, { "epoch": 1.58, "learning_rate": 2.6901742284950508e-05, "loss": 1.1193, "step": 12810 }, { "epoch": 1.59, "learning_rate": 2.6865860070972854e-05, "loss": 1.1014, "step": 12820 }, { "epoch": 1.59, "learning_rate": 2.68299739912748e-05, "loss": 1.1011, "step": 12830 }, { "epoch": 1.59, "learning_rate": 2.679408412020572e-05, "loss": 1.1107, "step": 12840 }, { "epoch": 1.59, "learning_rate": 2.675819053212287e-05, "loss": 1.076, "step": 12850 }, { "epoch": 1.59, "learning_rate": 2.6722293301391183e-05, "loss": 1.103, "step": 12860 }, { "epoch": 1.59, "learning_rate": 2.6686392502383146e-05, "loss": 1.0729, "step": 12870 }, { "epoch": 1.59, "learning_rate": 2.6650488209478646e-05, "loss": 1.0967, "step": 12880 }, { "epoch": 1.59, "learning_rate": 2.6614580497064795e-05, "loss": 1.087, "step": 12890 }, { "epoch": 1.6, "learning_rate": 2.6578669439535814e-05, "loss": 1.0684, "step": 12900 }, { "epoch": 1.6, "learning_rate": 2.6542755111292827e-05, "loss": 1.0746, "step": 12910 }, { "epoch": 1.6, "learning_rate": 2.6506837586743744e-05, "loss": 1.1133, "step": 12920 }, { "epoch": 1.6, "learning_rate": 2.6470916940303104e-05, "loss": 1.1435, "step": 12930 }, { "epoch": 1.6, "learning_rate": 2.6434993246391904e-05, "loss": 1.1051, "step": 12940 }, { "epoch": 1.6, "learning_rate": 2.639906657943747e-05, "loss": 1.0866, "step": 12950 }, { "epoch": 1.6, "learning_rate": 2.636313701387326e-05, "loss": 1.1402, "step": 12960 }, { "epoch": 1.6, "learning_rate": 2.6327204624138774e-05, "loss": 1.088, "step": 12970 }, { "epoch": 1.61, "learning_rate": 2.6291269484679332e-05, "loss": 1.0926, "step": 12980 }, { "epoch": 1.61, "learning_rate": 2.6255331669945965e-05, "loss": 1.0405, "step": 12990 }, { "epoch": 1.61, "learning_rate": 2.6219391254395243e-05, "loss": 1.1188, "step": 13000 }, { "epoch": 1.61, "eval_loss": 1.006820797920227, "eval_runtime": 5.7658, "eval_samples_per_second": 89.841, "eval_steps_per_second": 11.273, "step": 13000 }, { "epoch": 1.61, "learning_rate": 2.6183448312489135e-05, "loss": 1.0651, "step": 13010 }, { "epoch": 1.61, "learning_rate": 2.6147502918694823e-05, "loss": 1.0819, "step": 13020 }, { "epoch": 1.61, "learning_rate": 2.6111555147484583e-05, "loss": 1.0919, "step": 13030 }, { "epoch": 1.61, "learning_rate": 2.607560507333562e-05, "loss": 1.0267, "step": 13040 }, { "epoch": 1.61, "learning_rate": 2.60396527707299e-05, "loss": 1.0814, "step": 13050 }, { "epoch": 1.62, "learning_rate": 2.6003698314154008e-05, "loss": 1.0672, "step": 13060 }, { "epoch": 1.62, "learning_rate": 2.5967741778099015e-05, "loss": 1.0989, "step": 13070 }, { "epoch": 1.62, "learning_rate": 2.5931783237060254e-05, "loss": 1.1135, "step": 13080 }, { "epoch": 1.62, "learning_rate": 2.589582276553724e-05, "loss": 1.1294, "step": 13090 }, { "epoch": 1.62, "learning_rate": 2.58598604380335e-05, "loss": 1.0852, "step": 13100 }, { "epoch": 1.62, "learning_rate": 2.5823896329056384e-05, "loss": 1.0922, "step": 13110 }, { "epoch": 1.62, "learning_rate": 2.5787930513116943e-05, "loss": 1.1142, "step": 13120 }, { "epoch": 1.62, "learning_rate": 2.5751963064729752e-05, "loss": 1.0643, "step": 13130 }, { "epoch": 1.63, "learning_rate": 2.571599405841279e-05, "loss": 1.0794, "step": 13140 }, { "epoch": 1.63, "learning_rate": 2.5680023568687246e-05, "loss": 1.0957, "step": 13150 }, { "epoch": 1.63, "learning_rate": 2.5644051670077383e-05, "loss": 1.0755, "step": 13160 }, { "epoch": 1.63, "learning_rate": 2.56080784371104e-05, "loss": 1.0985, "step": 13170 }, { "epoch": 1.63, "learning_rate": 2.557210394431624e-05, "loss": 1.103, "step": 13180 }, { "epoch": 1.63, "learning_rate": 2.5536128266227467e-05, "loss": 1.1028, "step": 13190 }, { "epoch": 1.63, "learning_rate": 2.55001514773791e-05, "loss": 1.097, "step": 13200 }, { "epoch": 1.63, "learning_rate": 2.5464173652308447e-05, "loss": 1.051, "step": 13210 }, { "epoch": 1.64, "learning_rate": 2.5428194865554995e-05, "loss": 1.0596, "step": 13220 }, { "epoch": 1.64, "learning_rate": 2.5392215191660173e-05, "loss": 1.0959, "step": 13230 }, { "epoch": 1.64, "learning_rate": 2.5356234705167308e-05, "loss": 1.1071, "step": 13240 }, { "epoch": 1.64, "learning_rate": 2.5320253480621365e-05, "loss": 1.0861, "step": 13250 }, { "epoch": 1.64, "learning_rate": 2.528427159256885e-05, "loss": 1.1266, "step": 13260 }, { "epoch": 1.64, "learning_rate": 2.5248289115557646e-05, "loss": 1.06, "step": 13270 }, { "epoch": 1.64, "learning_rate": 2.5212306124136858e-05, "loss": 1.0775, "step": 13280 }, { "epoch": 1.64, "learning_rate": 2.5176322692856662e-05, "loss": 1.0725, "step": 13290 }, { "epoch": 1.65, "learning_rate": 2.5140338896268134e-05, "loss": 1.0847, "step": 13300 }, { "epoch": 1.65, "learning_rate": 2.510435480892311e-05, "loss": 1.0709, "step": 13310 }, { "epoch": 1.65, "learning_rate": 2.5068370505374022e-05, "loss": 1.0855, "step": 13320 }, { "epoch": 1.65, "learning_rate": 2.5032386060173772e-05, "loss": 1.0729, "step": 13330 }, { "epoch": 1.65, "learning_rate": 2.499640154787554e-05, "loss": 1.1062, "step": 13340 }, { "epoch": 1.65, "learning_rate": 2.4960417043032634e-05, "loss": 1.0873, "step": 13350 }, { "epoch": 1.65, "learning_rate": 2.4924432620198377e-05, "loss": 1.078, "step": 13360 }, { "epoch": 1.65, "learning_rate": 2.4888448353925885e-05, "loss": 1.1389, "step": 13370 }, { "epoch": 1.66, "learning_rate": 2.4852464318767976e-05, "loss": 1.1065, "step": 13380 }, { "epoch": 1.66, "learning_rate": 2.481648058927699e-05, "loss": 1.1067, "step": 13390 }, { "epoch": 1.66, "learning_rate": 2.478049724000461e-05, "loss": 1.1283, "step": 13400 }, { "epoch": 1.66, "learning_rate": 2.474451434550177e-05, "loss": 1.0986, "step": 13410 }, { "epoch": 1.66, "learning_rate": 2.470853198031841e-05, "loss": 1.0822, "step": 13420 }, { "epoch": 1.66, "learning_rate": 2.4672550219003413e-05, "loss": 1.0844, "step": 13430 }, { "epoch": 1.66, "learning_rate": 2.4636569136104414e-05, "loss": 1.1288, "step": 13440 }, { "epoch": 1.66, "learning_rate": 2.460058880616761e-05, "loss": 1.0828, "step": 13450 }, { "epoch": 1.66, "learning_rate": 2.4564609303737666e-05, "loss": 1.1175, "step": 13460 }, { "epoch": 1.67, "learning_rate": 2.452863070335751e-05, "loss": 1.0762, "step": 13470 }, { "epoch": 1.67, "learning_rate": 2.4492653079568227e-05, "loss": 1.0889, "step": 13480 }, { "epoch": 1.67, "learning_rate": 2.4456676506908866e-05, "loss": 1.0886, "step": 13490 }, { "epoch": 1.67, "learning_rate": 2.442070105991629e-05, "loss": 1.1116, "step": 13500 }, { "epoch": 1.67, "eval_loss": 1.002138376235962, "eval_runtime": 5.7621, "eval_samples_per_second": 89.898, "eval_steps_per_second": 11.281, "step": 13500 }, { "epoch": 1.67, "learning_rate": 2.4384726813125038e-05, "loss": 1.0763, "step": 13510 }, { "epoch": 1.67, "learning_rate": 2.434875384106717e-05, "loss": 1.0851, "step": 13520 }, { "epoch": 1.67, "learning_rate": 2.4312782218272086e-05, "loss": 1.1085, "step": 13530 }, { "epoch": 1.67, "learning_rate": 2.4276812019266415e-05, "loss": 1.0665, "step": 13540 }, { "epoch": 1.68, "learning_rate": 2.4240843318573812e-05, "loss": 1.1093, "step": 13550 }, { "epoch": 1.68, "learning_rate": 2.420487619071484e-05, "loss": 1.0694, "step": 13560 }, { "epoch": 1.68, "learning_rate": 2.4168910710206808e-05, "loss": 1.0724, "step": 13570 }, { "epoch": 1.68, "learning_rate": 2.41329469515636e-05, "loss": 1.0751, "step": 13580 }, { "epoch": 1.68, "learning_rate": 2.409698498929553e-05, "loss": 1.0785, "step": 13590 }, { "epoch": 1.68, "learning_rate": 2.4061024897909223e-05, "loss": 1.0908, "step": 13600 }, { "epoch": 1.68, "learning_rate": 2.4025066751907378e-05, "loss": 1.0931, "step": 13610 }, { "epoch": 1.68, "learning_rate": 2.398911062578871e-05, "loss": 1.0761, "step": 13620 }, { "epoch": 1.69, "learning_rate": 2.3953156594047715e-05, "loss": 1.1235, "step": 13630 }, { "epoch": 1.69, "learning_rate": 2.391720473117456e-05, "loss": 1.0935, "step": 13640 }, { "epoch": 1.69, "learning_rate": 2.388125511165494e-05, "loss": 1.0916, "step": 13650 }, { "epoch": 1.69, "learning_rate": 2.384530780996986e-05, "loss": 1.1348, "step": 13660 }, { "epoch": 1.69, "learning_rate": 2.3809362900595573e-05, "loss": 1.0946, "step": 13670 }, { "epoch": 1.69, "learning_rate": 2.377342045800333e-05, "loss": 1.0831, "step": 13680 }, { "epoch": 1.69, "learning_rate": 2.3737480556659285e-05, "loss": 1.0529, "step": 13690 }, { "epoch": 1.69, "learning_rate": 2.370154327102436e-05, "loss": 1.1044, "step": 13700 }, { "epoch": 1.7, "learning_rate": 2.3665608675554005e-05, "loss": 1.0843, "step": 13710 }, { "epoch": 1.7, "learning_rate": 2.3629676844698127e-05, "loss": 1.0959, "step": 13720 }, { "epoch": 1.7, "learning_rate": 2.3593747852900913e-05, "loss": 1.0895, "step": 13730 }, { "epoch": 1.7, "learning_rate": 2.3557821774600633e-05, "loss": 1.1204, "step": 13740 }, { "epoch": 1.7, "learning_rate": 2.352189868422957e-05, "loss": 1.0996, "step": 13750 }, { "epoch": 1.7, "learning_rate": 2.3485978656213763e-05, "loss": 1.1129, "step": 13760 }, { "epoch": 1.7, "learning_rate": 2.3450061764972942e-05, "loss": 1.0746, "step": 13770 }, { "epoch": 1.7, "learning_rate": 2.341414808492034e-05, "loss": 1.0801, "step": 13780 }, { "epoch": 1.71, "learning_rate": 2.3378237690462512e-05, "loss": 1.0826, "step": 13790 }, { "epoch": 1.71, "learning_rate": 2.3342330655999222e-05, "loss": 1.084, "step": 13800 }, { "epoch": 1.71, "learning_rate": 2.3306427055923275e-05, "loss": 1.0969, "step": 13810 }, { "epoch": 1.71, "learning_rate": 2.327052696462035e-05, "loss": 1.032, "step": 13820 }, { "epoch": 1.71, "learning_rate": 2.3234630456468872e-05, "loss": 1.09, "step": 13830 }, { "epoch": 1.71, "learning_rate": 2.3198737605839824e-05, "loss": 1.0605, "step": 13840 }, { "epoch": 1.71, "learning_rate": 2.3162848487096616e-05, "loss": 1.064, "step": 13850 }, { "epoch": 1.71, "learning_rate": 2.3126963174594947e-05, "loss": 1.0752, "step": 13860 }, { "epoch": 1.72, "learning_rate": 2.3091081742682596e-05, "loss": 1.118, "step": 13870 }, { "epoch": 1.72, "learning_rate": 2.3055204265699338e-05, "loss": 1.0587, "step": 13880 }, { "epoch": 1.72, "learning_rate": 2.3019330817976716e-05, "loss": 1.0904, "step": 13890 }, { "epoch": 1.72, "learning_rate": 2.298346147383795e-05, "loss": 1.0617, "step": 13900 }, { "epoch": 1.72, "learning_rate": 2.2947596307597764e-05, "loss": 1.083, "step": 13910 }, { "epoch": 1.72, "learning_rate": 2.2911735393562206e-05, "loss": 1.0648, "step": 13920 }, { "epoch": 1.72, "learning_rate": 2.2875878806028518e-05, "loss": 1.0828, "step": 13930 }, { "epoch": 1.72, "learning_rate": 2.2840026619285005e-05, "loss": 1.0687, "step": 13940 }, { "epoch": 1.73, "learning_rate": 2.2804178907610803e-05, "loss": 1.0991, "step": 13950 }, { "epoch": 1.73, "learning_rate": 2.2768335745275836e-05, "loss": 1.0864, "step": 13960 }, { "epoch": 1.73, "learning_rate": 2.273249720654055e-05, "loss": 1.0826, "step": 13970 }, { "epoch": 1.73, "learning_rate": 2.2696663365655837e-05, "loss": 1.0779, "step": 13980 }, { "epoch": 1.73, "learning_rate": 2.2660834296862874e-05, "loss": 1.0897, "step": 13990 }, { "epoch": 1.73, "learning_rate": 2.2625010074392907e-05, "loss": 1.1008, "step": 14000 }, { "epoch": 1.73, "eval_loss": 1.0019787549972534, "eval_runtime": 5.7595, "eval_samples_per_second": 89.939, "eval_steps_per_second": 11.286, "step": 14000 }, { "epoch": 1.73, "learning_rate": 2.2589190772467183e-05, "loss": 1.0905, "step": 14010 }, { "epoch": 1.73, "learning_rate": 2.2553376465296727e-05, "loss": 1.0939, "step": 14020 }, { "epoch": 1.74, "learning_rate": 2.2517567227082217e-05, "loss": 1.0887, "step": 14030 }, { "epoch": 1.74, "learning_rate": 2.248176313201386e-05, "loss": 1.0822, "step": 14040 }, { "epoch": 1.74, "learning_rate": 2.244596425427117e-05, "loss": 1.0675, "step": 14050 }, { "epoch": 1.74, "learning_rate": 2.2410170668022863e-05, "loss": 1.0688, "step": 14060 }, { "epoch": 1.74, "learning_rate": 2.2374382447426707e-05, "loss": 1.06, "step": 14070 }, { "epoch": 1.74, "learning_rate": 2.2338599666629333e-05, "loss": 1.0736, "step": 14080 }, { "epoch": 1.74, "learning_rate": 2.230282239976612e-05, "loss": 1.1037, "step": 14090 }, { "epoch": 1.74, "learning_rate": 2.2267050720961003e-05, "loss": 1.0497, "step": 14100 }, { "epoch": 1.75, "learning_rate": 2.2231284704326345e-05, "loss": 1.1091, "step": 14110 }, { "epoch": 1.75, "learning_rate": 2.219552442396281e-05, "loss": 1.113, "step": 14120 }, { "epoch": 1.75, "learning_rate": 2.215976995395912e-05, "loss": 1.0675, "step": 14130 }, { "epoch": 1.75, "learning_rate": 2.212402136839201e-05, "loss": 1.1071, "step": 14140 }, { "epoch": 1.75, "learning_rate": 2.2088278741325997e-05, "loss": 1.0858, "step": 14150 }, { "epoch": 1.75, "learning_rate": 2.205254214681325e-05, "loss": 1.099, "step": 14160 }, { "epoch": 1.75, "learning_rate": 2.201681165889347e-05, "loss": 1.0855, "step": 14170 }, { "epoch": 1.75, "learning_rate": 2.1981087351593673e-05, "loss": 1.0687, "step": 14180 }, { "epoch": 1.76, "learning_rate": 2.194536929892807e-05, "loss": 1.0719, "step": 14190 }, { "epoch": 1.76, "learning_rate": 2.190965757489795e-05, "loss": 1.0843, "step": 14200 }, { "epoch": 1.76, "learning_rate": 2.1873952253491445e-05, "loss": 1.0915, "step": 14210 }, { "epoch": 1.76, "learning_rate": 2.1838253408683462e-05, "loss": 1.0946, "step": 14220 }, { "epoch": 1.76, "learning_rate": 2.1802561114435456e-05, "loss": 1.1188, "step": 14230 }, { "epoch": 1.76, "learning_rate": 2.176687544469532e-05, "loss": 1.116, "step": 14240 }, { "epoch": 1.76, "learning_rate": 2.173119647339725e-05, "loss": 1.0368, "step": 14250 }, { "epoch": 1.76, "learning_rate": 2.169552427446152e-05, "loss": 1.1216, "step": 14260 }, { "epoch": 1.77, "learning_rate": 2.1659858921794407e-05, "loss": 1.0735, "step": 14270 }, { "epoch": 1.77, "learning_rate": 2.162420048928798e-05, "loss": 1.1231, "step": 14280 }, { "epoch": 1.77, "learning_rate": 2.158854905081999e-05, "loss": 1.0756, "step": 14290 }, { "epoch": 1.77, "learning_rate": 2.1552904680253694e-05, "loss": 1.0849, "step": 14300 }, { "epoch": 1.77, "learning_rate": 2.1517267451437686e-05, "loss": 1.052, "step": 14310 }, { "epoch": 1.77, "learning_rate": 2.1481637438205782e-05, "loss": 1.0669, "step": 14320 }, { "epoch": 1.77, "learning_rate": 2.144601471437686e-05, "loss": 1.0848, "step": 14330 }, { "epoch": 1.77, "learning_rate": 2.141039935375466e-05, "loss": 1.0665, "step": 14340 }, { "epoch": 1.78, "learning_rate": 2.1374791430127708e-05, "loss": 1.1204, "step": 14350 }, { "epoch": 1.78, "learning_rate": 2.1339191017269087e-05, "loss": 1.0985, "step": 14360 }, { "epoch": 1.78, "learning_rate": 2.130359818893633e-05, "loss": 1.113, "step": 14370 }, { "epoch": 1.78, "learning_rate": 2.126801301887128e-05, "loss": 1.0699, "step": 14380 }, { "epoch": 1.78, "learning_rate": 2.1232435580799876e-05, "loss": 1.0659, "step": 14390 }, { "epoch": 1.78, "learning_rate": 2.1196865948432052e-05, "loss": 1.1243, "step": 14400 }, { "epoch": 1.78, "learning_rate": 2.116130419546159e-05, "loss": 1.0576, "step": 14410 }, { "epoch": 1.78, "learning_rate": 2.1125750395565915e-05, "loss": 1.0976, "step": 14420 }, { "epoch": 1.78, "learning_rate": 2.1090204622406008e-05, "loss": 1.0876, "step": 14430 }, { "epoch": 1.79, "learning_rate": 2.1054666949626178e-05, "loss": 1.0754, "step": 14440 }, { "epoch": 1.79, "learning_rate": 2.101913745085399e-05, "loss": 1.0733, "step": 14450 }, { "epoch": 1.79, "learning_rate": 2.0983616199700063e-05, "loss": 1.0759, "step": 14460 }, { "epoch": 1.79, "learning_rate": 2.094810326975791e-05, "loss": 1.0527, "step": 14470 }, { "epoch": 1.79, "learning_rate": 2.0912598734603843e-05, "loss": 1.0269, "step": 14480 }, { "epoch": 1.79, "learning_rate": 2.0877102667796733e-05, "loss": 1.1048, "step": 14490 }, { "epoch": 1.79, "learning_rate": 2.084161514287794e-05, "loss": 1.0893, "step": 14500 }, { "epoch": 1.79, "eval_loss": 1.0002995729446411, "eval_runtime": 5.7626, "eval_samples_per_second": 89.89, "eval_steps_per_second": 11.28, "step": 14500 }, { "epoch": 1.79, "learning_rate": 2.0806136233371122e-05, "loss": 1.0777, "step": 14510 }, { "epoch": 1.8, "learning_rate": 2.0770666012782077e-05, "loss": 1.0755, "step": 14520 }, { "epoch": 1.8, "learning_rate": 2.0735204554598602e-05, "loss": 1.0881, "step": 14530 }, { "epoch": 1.8, "learning_rate": 2.0699751932290368e-05, "loss": 1.0882, "step": 14540 }, { "epoch": 1.8, "learning_rate": 2.0664308219308685e-05, "loss": 1.0791, "step": 14550 }, { "epoch": 1.8, "learning_rate": 2.062887348908646e-05, "loss": 1.1286, "step": 14560 }, { "epoch": 1.8, "learning_rate": 2.059344781503796e-05, "loss": 1.0334, "step": 14570 }, { "epoch": 1.8, "learning_rate": 2.0558031270558678e-05, "loss": 1.1084, "step": 14580 }, { "epoch": 1.8, "learning_rate": 2.0522623929025237e-05, "loss": 1.0731, "step": 14590 }, { "epoch": 1.81, "learning_rate": 2.048722586379514e-05, "loss": 1.0651, "step": 14600 }, { "epoch": 1.81, "learning_rate": 2.0451837148206714e-05, "loss": 1.0947, "step": 14610 }, { "epoch": 1.81, "learning_rate": 2.0416457855578892e-05, "loss": 1.1124, "step": 14620 }, { "epoch": 1.81, "learning_rate": 2.038108805921108e-05, "loss": 1.0508, "step": 14630 }, { "epoch": 1.81, "learning_rate": 2.034572783238304e-05, "loss": 1.0964, "step": 14640 }, { "epoch": 1.81, "learning_rate": 2.0310377248354666e-05, "loss": 1.093, "step": 14650 }, { "epoch": 1.81, "learning_rate": 2.02750363803659e-05, "loss": 1.0838, "step": 14660 }, { "epoch": 1.81, "learning_rate": 2.0239705301636566e-05, "loss": 1.0922, "step": 14670 }, { "epoch": 1.82, "learning_rate": 2.0204384085366165e-05, "loss": 1.084, "step": 14680 }, { "epoch": 1.82, "learning_rate": 2.0169072804733812e-05, "loss": 1.0713, "step": 14690 }, { "epoch": 1.82, "learning_rate": 2.0133771532898e-05, "loss": 1.0902, "step": 14700 }, { "epoch": 1.82, "learning_rate": 2.0098480342996494e-05, "loss": 1.0772, "step": 14710 }, { "epoch": 1.82, "learning_rate": 2.0063199308146204e-05, "loss": 1.0848, "step": 14720 }, { "epoch": 1.82, "learning_rate": 2.0027928501442937e-05, "loss": 1.0837, "step": 14730 }, { "epoch": 1.82, "learning_rate": 1.999266799596138e-05, "loss": 1.0704, "step": 14740 }, { "epoch": 1.82, "learning_rate": 1.995741786475483e-05, "loss": 1.0567, "step": 14750 }, { "epoch": 1.83, "learning_rate": 1.9922178180855094e-05, "loss": 1.1145, "step": 14760 }, { "epoch": 1.83, "learning_rate": 1.9886949017272366e-05, "loss": 1.0776, "step": 14770 }, { "epoch": 1.83, "learning_rate": 1.9851730446995004e-05, "loss": 1.1325, "step": 14780 }, { "epoch": 1.83, "learning_rate": 1.9816522542989437e-05, "loss": 1.0749, "step": 14790 }, { "epoch": 1.83, "learning_rate": 1.978132537820001e-05, "loss": 1.1291, "step": 14800 }, { "epoch": 1.83, "learning_rate": 1.974613902554878e-05, "loss": 1.086, "step": 14810 }, { "epoch": 1.83, "learning_rate": 1.971096355793546e-05, "loss": 1.0986, "step": 14820 }, { "epoch": 1.83, "learning_rate": 1.9675799048237147e-05, "loss": 1.0714, "step": 14830 }, { "epoch": 1.84, "learning_rate": 1.9640645569308272e-05, "loss": 1.1406, "step": 14840 }, { "epoch": 1.84, "learning_rate": 1.9605503193980423e-05, "loss": 1.0807, "step": 14850 }, { "epoch": 1.84, "learning_rate": 1.9570371995062152e-05, "loss": 1.0688, "step": 14860 }, { "epoch": 1.84, "learning_rate": 1.9535252045338866e-05, "loss": 1.0679, "step": 14870 }, { "epoch": 1.84, "learning_rate": 1.9500143417572692e-05, "loss": 1.0699, "step": 14880 }, { "epoch": 1.84, "learning_rate": 1.9465046184502248e-05, "loss": 1.1133, "step": 14890 }, { "epoch": 1.84, "learning_rate": 1.9429960418842603e-05, "loss": 1.0837, "step": 14900 }, { "epoch": 1.84, "learning_rate": 1.9394886193285015e-05, "loss": 1.078, "step": 14910 }, { "epoch": 1.85, "learning_rate": 1.935982358049687e-05, "loss": 1.1155, "step": 14920 }, { "epoch": 1.85, "learning_rate": 1.9324772653121483e-05, "loss": 1.0921, "step": 14930 }, { "epoch": 1.85, "learning_rate": 1.9289733483777944e-05, "loss": 1.066, "step": 14940 }, { "epoch": 1.85, "learning_rate": 1.9254706145061016e-05, "loss": 1.0833, "step": 14950 }, { "epoch": 1.85, "learning_rate": 1.9219690709540914e-05, "loss": 1.0873, "step": 14960 }, { "epoch": 1.85, "learning_rate": 1.918468724976321e-05, "loss": 1.0585, "step": 14970 }, { "epoch": 1.85, "learning_rate": 1.9149695838248674e-05, "loss": 1.0517, "step": 14980 }, { "epoch": 1.85, "learning_rate": 1.911471654749309e-05, "loss": 1.0816, "step": 14990 }, { "epoch": 1.86, "learning_rate": 1.9079749449967148e-05, "loss": 1.0257, "step": 15000 }, { "epoch": 1.86, "eval_loss": 0.9986115097999573, "eval_runtime": 5.7655, "eval_samples_per_second": 89.845, "eval_steps_per_second": 11.274, "step": 15000 }, { "epoch": 1.86, "learning_rate": 1.9044794618116284e-05, "loss": 1.0465, "step": 15010 }, { "epoch": 1.86, "learning_rate": 1.9009852124360487e-05, "loss": 1.0867, "step": 15020 }, { "epoch": 1.86, "learning_rate": 1.8974922041094227e-05, "loss": 1.0526, "step": 15030 }, { "epoch": 1.86, "learning_rate": 1.894000444068622e-05, "loss": 1.0998, "step": 15040 }, { "epoch": 1.86, "learning_rate": 1.8905099395479348e-05, "loss": 1.0879, "step": 15050 }, { "epoch": 1.86, "learning_rate": 1.8870206977790485e-05, "loss": 1.0923, "step": 15060 }, { "epoch": 1.86, "learning_rate": 1.883532725991031e-05, "loss": 1.1194, "step": 15070 }, { "epoch": 1.87, "learning_rate": 1.8800460314103233e-05, "loss": 1.072, "step": 15080 }, { "epoch": 1.87, "learning_rate": 1.876560621260717e-05, "loss": 1.0629, "step": 15090 }, { "epoch": 1.87, "learning_rate": 1.8730765027633428e-05, "loss": 1.0837, "step": 15100 }, { "epoch": 1.87, "learning_rate": 1.869593683136659e-05, "loss": 1.091, "step": 15110 }, { "epoch": 1.87, "learning_rate": 1.8661121695964273e-05, "loss": 1.0649, "step": 15120 }, { "epoch": 1.87, "learning_rate": 1.8626319693557066e-05, "loss": 1.0886, "step": 15130 }, { "epoch": 1.87, "learning_rate": 1.8591530896248365e-05, "loss": 1.0789, "step": 15140 }, { "epoch": 1.87, "learning_rate": 1.8556755376114164e-05, "loss": 1.1161, "step": 15150 }, { "epoch": 1.88, "learning_rate": 1.8521993205203e-05, "loss": 1.0907, "step": 15160 }, { "epoch": 1.88, "learning_rate": 1.8487244455535696e-05, "loss": 1.0725, "step": 15170 }, { "epoch": 1.88, "learning_rate": 1.845250919910531e-05, "loss": 1.0891, "step": 15180 }, { "epoch": 1.88, "learning_rate": 1.8417787507876947e-05, "loss": 1.0671, "step": 15190 }, { "epoch": 1.88, "learning_rate": 1.8383079453787567e-05, "loss": 1.0897, "step": 15200 }, { "epoch": 1.88, "learning_rate": 1.8348385108745926e-05, "loss": 1.0891, "step": 15210 }, { "epoch": 1.88, "learning_rate": 1.8313704544632347e-05, "loss": 1.0926, "step": 15220 }, { "epoch": 1.88, "learning_rate": 1.82790378332986e-05, "loss": 1.0931, "step": 15230 }, { "epoch": 1.89, "learning_rate": 1.8244385046567774e-05, "loss": 1.0992, "step": 15240 }, { "epoch": 1.89, "learning_rate": 1.8209746256234092e-05, "loss": 1.0846, "step": 15250 }, { "epoch": 1.89, "learning_rate": 1.8175121534062776e-05, "loss": 1.0472, "step": 15260 }, { "epoch": 1.89, "learning_rate": 1.8140510951789934e-05, "loss": 1.0939, "step": 15270 }, { "epoch": 1.89, "learning_rate": 1.810591458112233e-05, "loss": 1.0917, "step": 15280 }, { "epoch": 1.89, "learning_rate": 1.807133249373733e-05, "loss": 1.1027, "step": 15290 }, { "epoch": 1.89, "learning_rate": 1.803676476128267e-05, "loss": 1.1106, "step": 15300 }, { "epoch": 1.89, "learning_rate": 1.8002211455376363e-05, "loss": 1.0574, "step": 15310 }, { "epoch": 1.9, "learning_rate": 1.796767264760655e-05, "loss": 1.0676, "step": 15320 }, { "epoch": 1.9, "learning_rate": 1.7933148409531286e-05, "loss": 1.0656, "step": 15330 }, { "epoch": 1.9, "learning_rate": 1.7898638812678496e-05, "loss": 1.0868, "step": 15340 }, { "epoch": 1.9, "learning_rate": 1.7864143928545745e-05, "loss": 1.0756, "step": 15350 }, { "epoch": 1.9, "learning_rate": 1.7829663828600096e-05, "loss": 1.0795, "step": 15360 }, { "epoch": 1.9, "learning_rate": 1.779519858427802e-05, "loss": 1.0768, "step": 15370 }, { "epoch": 1.9, "learning_rate": 1.776074826698519e-05, "loss": 1.0224, "step": 15380 }, { "epoch": 1.9, "learning_rate": 1.7726312948096344e-05, "loss": 1.0702, "step": 15390 }, { "epoch": 1.9, "learning_rate": 1.7691892698955177e-05, "loss": 1.0781, "step": 15400 }, { "epoch": 1.91, "learning_rate": 1.7657487590874117e-05, "loss": 1.0737, "step": 15410 }, { "epoch": 1.91, "learning_rate": 1.7623097695134267e-05, "loss": 1.1002, "step": 15420 }, { "epoch": 1.91, "learning_rate": 1.7588723082985196e-05, "loss": 1.0965, "step": 15430 }, { "epoch": 1.91, "learning_rate": 1.7554363825644787e-05, "loss": 1.1087, "step": 15440 }, { "epoch": 1.91, "learning_rate": 1.7520019994299153e-05, "loss": 1.0702, "step": 15450 }, { "epoch": 1.91, "learning_rate": 1.7485691660102405e-05, "loss": 1.0821, "step": 15460 }, { "epoch": 1.91, "learning_rate": 1.7451378894176565e-05, "loss": 1.0597, "step": 15470 }, { "epoch": 1.91, "learning_rate": 1.741708176761142e-05, "loss": 1.0824, "step": 15480 }, { "epoch": 1.92, "learning_rate": 1.7382800351464316e-05, "loss": 1.1146, "step": 15490 }, { "epoch": 1.92, "learning_rate": 1.7348534716760084e-05, "loss": 1.1132, "step": 15500 }, { "epoch": 1.92, "eval_loss": 0.9955187439918518, "eval_runtime": 5.7634, "eval_samples_per_second": 89.877, "eval_steps_per_second": 11.278, "step": 15500 }, { "epoch": 1.92, "learning_rate": 1.7314284934490838e-05, "loss": 1.0631, "step": 15510 }, { "epoch": 1.92, "learning_rate": 1.728005107561585e-05, "loss": 1.1108, "step": 15520 }, { "epoch": 1.92, "learning_rate": 1.724583321106142e-05, "loss": 1.0998, "step": 15530 }, { "epoch": 1.92, "learning_rate": 1.7211631411720687e-05, "loss": 1.0918, "step": 15540 }, { "epoch": 1.92, "learning_rate": 1.717744574845352e-05, "loss": 1.0374, "step": 15550 }, { "epoch": 1.92, "learning_rate": 1.714327629208636e-05, "loss": 1.0973, "step": 15560 }, { "epoch": 1.93, "learning_rate": 1.710912311341204e-05, "loss": 1.071, "step": 15570 }, { "epoch": 1.93, "learning_rate": 1.7074986283189728e-05, "loss": 1.0946, "step": 15580 }, { "epoch": 1.93, "learning_rate": 1.704086587214466e-05, "loss": 1.0998, "step": 15590 }, { "epoch": 1.93, "learning_rate": 1.7006761950968082e-05, "loss": 1.071, "step": 15600 }, { "epoch": 1.93, "learning_rate": 1.6972674590317088e-05, "loss": 1.0534, "step": 15610 }, { "epoch": 1.93, "learning_rate": 1.693860386081443e-05, "loss": 1.1007, "step": 15620 }, { "epoch": 1.93, "learning_rate": 1.6904549833048435e-05, "loss": 1.077, "step": 15630 }, { "epoch": 1.93, "learning_rate": 1.68705125775728e-05, "loss": 1.0493, "step": 15640 }, { "epoch": 1.94, "learning_rate": 1.6836492164906492e-05, "loss": 1.0791, "step": 15650 }, { "epoch": 1.94, "learning_rate": 1.6802488665533585e-05, "loss": 1.0917, "step": 15660 }, { "epoch": 1.94, "learning_rate": 1.6768502149903083e-05, "loss": 1.09, "step": 15670 }, { "epoch": 1.94, "learning_rate": 1.6734532688428838e-05, "loss": 1.1026, "step": 15680 }, { "epoch": 1.94, "learning_rate": 1.6700580351489365e-05, "loss": 1.0366, "step": 15690 }, { "epoch": 1.94, "learning_rate": 1.6666645209427657e-05, "loss": 1.1109, "step": 15700 }, { "epoch": 1.94, "learning_rate": 1.6632727332551147e-05, "loss": 1.0756, "step": 15710 }, { "epoch": 1.94, "learning_rate": 1.6598826791131438e-05, "loss": 1.0642, "step": 15720 }, { "epoch": 1.95, "learning_rate": 1.6564943655404258e-05, "loss": 1.0914, "step": 15730 }, { "epoch": 1.95, "learning_rate": 1.6531077995569267e-05, "loss": 1.0902, "step": 15740 }, { "epoch": 1.95, "learning_rate": 1.6497229881789892e-05, "loss": 1.0591, "step": 15750 }, { "epoch": 1.95, "learning_rate": 1.646339938419324e-05, "loss": 1.0667, "step": 15760 }, { "epoch": 1.95, "learning_rate": 1.642958657286991e-05, "loss": 1.1131, "step": 15770 }, { "epoch": 1.95, "learning_rate": 1.6395791517873842e-05, "loss": 1.086, "step": 15780 }, { "epoch": 1.95, "learning_rate": 1.6362014289222217e-05, "loss": 1.0942, "step": 15790 }, { "epoch": 1.95, "learning_rate": 1.6328254956895253e-05, "loss": 1.1188, "step": 15800 }, { "epoch": 1.96, "learning_rate": 1.6294513590836116e-05, "loss": 1.0417, "step": 15810 }, { "epoch": 1.96, "learning_rate": 1.6260790260950737e-05, "loss": 1.075, "step": 15820 }, { "epoch": 1.96, "learning_rate": 1.622708503710767e-05, "loss": 1.0743, "step": 15830 }, { "epoch": 1.96, "learning_rate": 1.6193397989137983e-05, "loss": 1.0884, "step": 15840 }, { "epoch": 1.96, "learning_rate": 1.615972918683506e-05, "loss": 1.0679, "step": 15850 }, { "epoch": 1.96, "learning_rate": 1.6126078699954486e-05, "loss": 1.0816, "step": 15860 }, { "epoch": 1.96, "learning_rate": 1.609244659821393e-05, "loss": 1.0676, "step": 15870 }, { "epoch": 1.96, "learning_rate": 1.605883295129293e-05, "loss": 1.0918, "step": 15880 }, { "epoch": 1.97, "learning_rate": 1.6025237828832816e-05, "loss": 1.109, "step": 15890 }, { "epoch": 1.97, "learning_rate": 1.5991661300436533e-05, "loss": 1.063, "step": 15900 }, { "epoch": 1.97, "learning_rate": 1.5958103435668486e-05, "loss": 1.0995, "step": 15910 }, { "epoch": 1.97, "learning_rate": 1.592456430405444e-05, "loss": 1.0594, "step": 15920 }, { "epoch": 1.97, "learning_rate": 1.589104397508133e-05, "loss": 1.1224, "step": 15930 }, { "epoch": 1.97, "learning_rate": 1.585754251819713e-05, "loss": 1.0781, "step": 15940 }, { "epoch": 1.97, "learning_rate": 1.5824060002810744e-05, "loss": 1.0535, "step": 15950 }, { "epoch": 1.97, "learning_rate": 1.579059649829179e-05, "loss": 1.0908, "step": 15960 }, { "epoch": 1.98, "learning_rate": 1.5757152073970545e-05, "loss": 1.0995, "step": 15970 }, { "epoch": 1.98, "learning_rate": 1.5723726799137714e-05, "loss": 1.0563, "step": 15980 }, { "epoch": 1.98, "learning_rate": 1.5690320743044345e-05, "loss": 1.0789, "step": 15990 }, { "epoch": 1.98, "learning_rate": 1.5656933974901688e-05, "loss": 1.048, "step": 16000 }, { "epoch": 1.98, "eval_loss": 0.9951100945472717, "eval_runtime": 5.7671, "eval_samples_per_second": 89.819, "eval_steps_per_second": 11.271, "step": 16000 }, { "epoch": 1.98, "learning_rate": 1.562356656388099e-05, "loss": 1.0634, "step": 16010 }, { "epoch": 1.98, "learning_rate": 1.559021857911344e-05, "loss": 1.0741, "step": 16020 }, { "epoch": 1.98, "learning_rate": 1.555689008968994e-05, "loss": 1.0496, "step": 16030 }, { "epoch": 1.98, "learning_rate": 1.5523581164661016e-05, "loss": 1.0502, "step": 16040 }, { "epoch": 1.99, "learning_rate": 1.5490291873036678e-05, "loss": 1.0485, "step": 16050 }, { "epoch": 1.99, "learning_rate": 1.545702228378622e-05, "loss": 1.0796, "step": 16060 }, { "epoch": 1.99, "learning_rate": 1.542377246583815e-05, "loss": 1.0784, "step": 16070 }, { "epoch": 1.99, "learning_rate": 1.5390542488080008e-05, "loss": 1.0884, "step": 16080 }, { "epoch": 1.99, "learning_rate": 1.535733241935821e-05, "loss": 1.0825, "step": 16090 }, { "epoch": 1.99, "learning_rate": 1.532414232847795e-05, "loss": 1.1553, "step": 16100 }, { "epoch": 1.99, "learning_rate": 1.529097228420302e-05, "loss": 1.0951, "step": 16110 }, { "epoch": 1.99, "learning_rate": 1.5257822355255657e-05, "loss": 1.0855, "step": 16120 }, { "epoch": 2.0, "learning_rate": 1.5224692610316471e-05, "loss": 1.0998, "step": 16130 }, { "epoch": 2.0, "learning_rate": 1.51915831180242e-05, "loss": 1.0826, "step": 16140 }, { "epoch": 2.0, "learning_rate": 1.5158493946975672e-05, "loss": 1.0591, "step": 16150 }, { "epoch": 2.0, "learning_rate": 1.5125425165725585e-05, "loss": 1.0561, "step": 16160 }, { "epoch": 2.0, "learning_rate": 1.5092376842786386e-05, "loss": 1.0665, "step": 16170 }, { "epoch": 2.0, "learning_rate": 1.5059349046628163e-05, "loss": 1.0533, "step": 16180 }, { "epoch": 2.0, "learning_rate": 1.5026341845678454e-05, "loss": 1.0734, "step": 16190 }, { "epoch": 2.0, "learning_rate": 1.4993355308322135e-05, "loss": 1.0379, "step": 16200 }, { "epoch": 2.01, "learning_rate": 1.4960389502901284e-05, "loss": 1.0452, "step": 16210 }, { "epoch": 2.01, "learning_rate": 1.492744449771499e-05, "loss": 1.0353, "step": 16220 }, { "epoch": 2.01, "learning_rate": 1.489452036101929e-05, "loss": 1.0286, "step": 16230 }, { "epoch": 2.01, "learning_rate": 1.4861617161026967e-05, "loss": 1.0727, "step": 16240 }, { "epoch": 2.01, "learning_rate": 1.482873496590741e-05, "loss": 1.0677, "step": 16250 }, { "epoch": 2.01, "learning_rate": 1.4795873843786524e-05, "loss": 1.0828, "step": 16260 }, { "epoch": 2.01, "learning_rate": 1.4763033862746519e-05, "loss": 1.1342, "step": 16270 }, { "epoch": 2.01, "learning_rate": 1.4730215090825838e-05, "loss": 1.0501, "step": 16280 }, { "epoch": 2.02, "learning_rate": 1.469741759601897e-05, "loss": 1.056, "step": 16290 }, { "epoch": 2.02, "learning_rate": 1.4664641446276295e-05, "loss": 1.0772, "step": 16300 }, { "epoch": 2.02, "learning_rate": 1.4631886709504019e-05, "loss": 1.0691, "step": 16310 }, { "epoch": 2.02, "learning_rate": 1.4599153453563943e-05, "loss": 1.0591, "step": 16320 }, { "epoch": 2.02, "learning_rate": 1.4566441746273376e-05, "loss": 1.0511, "step": 16330 }, { "epoch": 2.02, "learning_rate": 1.4533751655405015e-05, "loss": 1.053, "step": 16340 }, { "epoch": 2.02, "learning_rate": 1.450108324868671e-05, "loss": 1.0393, "step": 16350 }, { "epoch": 2.02, "learning_rate": 1.4468436593801416e-05, "loss": 1.0728, "step": 16360 }, { "epoch": 2.02, "learning_rate": 1.443581175838706e-05, "loss": 1.0796, "step": 16370 }, { "epoch": 2.03, "learning_rate": 1.4403208810036295e-05, "loss": 1.0782, "step": 16380 }, { "epoch": 2.03, "learning_rate": 1.4370627816296468e-05, "loss": 1.0577, "step": 16390 }, { "epoch": 2.03, "learning_rate": 1.4338068844669434e-05, "loss": 1.094, "step": 16400 }, { "epoch": 2.03, "learning_rate": 1.4305531962611406e-05, "loss": 1.072, "step": 16410 }, { "epoch": 2.03, "learning_rate": 1.4273017237532882e-05, "loss": 1.0528, "step": 16420 }, { "epoch": 2.03, "learning_rate": 1.4240524736798383e-05, "loss": 1.0699, "step": 16430 }, { "epoch": 2.03, "learning_rate": 1.4208054527726422e-05, "loss": 1.0727, "step": 16440 }, { "epoch": 2.03, "learning_rate": 1.4175606677589363e-05, "loss": 1.0395, "step": 16450 }, { "epoch": 2.04, "learning_rate": 1.4143181253613169e-05, "loss": 1.0805, "step": 16460 }, { "epoch": 2.04, "learning_rate": 1.41107783229774e-05, "loss": 1.0824, "step": 16470 }, { "epoch": 2.04, "learning_rate": 1.4078397952814993e-05, "loss": 1.0447, "step": 16480 }, { "epoch": 2.04, "learning_rate": 1.404604021021213e-05, "loss": 1.0873, "step": 16490 }, { "epoch": 2.04, "learning_rate": 1.4013705162208163e-05, "loss": 1.0603, "step": 16500 }, { "epoch": 2.04, "eval_loss": 0.9945240616798401, "eval_runtime": 5.7637, "eval_samples_per_second": 89.872, "eval_steps_per_second": 11.277, "step": 16500 }, { "epoch": 2.04, "learning_rate": 1.3981392875795363e-05, "loss": 1.0761, "step": 16510 }, { "epoch": 2.04, "learning_rate": 1.3949103417918873e-05, "loss": 1.0865, "step": 16520 }, { "epoch": 2.04, "learning_rate": 1.3916836855476545e-05, "loss": 1.0787, "step": 16530 }, { "epoch": 2.05, "learning_rate": 1.3884593255318778e-05, "loss": 1.0605, "step": 16540 }, { "epoch": 2.05, "learning_rate": 1.3852372684248429e-05, "loss": 1.0953, "step": 16550 }, { "epoch": 2.05, "learning_rate": 1.38201752090206e-05, "loss": 1.0849, "step": 16560 }, { "epoch": 2.05, "learning_rate": 1.378800089634256e-05, "loss": 1.0835, "step": 16570 }, { "epoch": 2.05, "learning_rate": 1.3755849812873622e-05, "loss": 1.084, "step": 16580 }, { "epoch": 2.05, "learning_rate": 1.3723722025224916e-05, "loss": 1.0312, "step": 16590 }, { "epoch": 2.05, "learning_rate": 1.3691617599959345e-05, "loss": 1.0514, "step": 16600 }, { "epoch": 2.05, "learning_rate": 1.36595366035914e-05, "loss": 1.0556, "step": 16610 }, { "epoch": 2.06, "learning_rate": 1.3627479102587019e-05, "loss": 1.0867, "step": 16620 }, { "epoch": 2.06, "learning_rate": 1.3595445163363502e-05, "loss": 1.0725, "step": 16630 }, { "epoch": 2.06, "learning_rate": 1.356343485228928e-05, "loss": 1.062, "step": 16640 }, { "epoch": 2.06, "learning_rate": 1.3531448235683867e-05, "loss": 1.071, "step": 16650 }, { "epoch": 2.06, "learning_rate": 1.3499485379817667e-05, "loss": 1.0432, "step": 16660 }, { "epoch": 2.06, "learning_rate": 1.3467546350911864e-05, "loss": 1.0427, "step": 16670 }, { "epoch": 2.06, "learning_rate": 1.3435631215138305e-05, "loss": 1.0421, "step": 16680 }, { "epoch": 2.06, "learning_rate": 1.3403740038619272e-05, "loss": 1.0783, "step": 16690 }, { "epoch": 2.07, "learning_rate": 1.337187288742745e-05, "loss": 0.9995, "step": 16700 }, { "epoch": 2.07, "learning_rate": 1.3340029827585766e-05, "loss": 1.0719, "step": 16710 }, { "epoch": 2.07, "learning_rate": 1.3308210925067182e-05, "loss": 1.0608, "step": 16720 }, { "epoch": 2.07, "learning_rate": 1.3276416245794646e-05, "loss": 1.0671, "step": 16730 }, { "epoch": 2.07, "learning_rate": 1.3244645855640914e-05, "loss": 1.041, "step": 16740 }, { "epoch": 2.07, "learning_rate": 1.32128998204284e-05, "loss": 1.0578, "step": 16750 }, { "epoch": 2.07, "learning_rate": 1.3181178205929115e-05, "loss": 1.0516, "step": 16760 }, { "epoch": 2.07, "learning_rate": 1.3149481077864401e-05, "loss": 1.0757, "step": 16770 }, { "epoch": 2.08, "learning_rate": 1.3117808501904902e-05, "loss": 1.0355, "step": 16780 }, { "epoch": 2.08, "learning_rate": 1.3086160543670429e-05, "loss": 1.0932, "step": 16790 }, { "epoch": 2.08, "learning_rate": 1.3054537268729711e-05, "loss": 1.0553, "step": 16800 }, { "epoch": 2.08, "learning_rate": 1.3022938742600432e-05, "loss": 1.0808, "step": 16810 }, { "epoch": 2.08, "learning_rate": 1.2991365030748914e-05, "loss": 1.0914, "step": 16820 }, { "epoch": 2.08, "learning_rate": 1.2959816198590108e-05, "loss": 1.0852, "step": 16830 }, { "epoch": 2.08, "learning_rate": 1.292829231148745e-05, "loss": 1.0732, "step": 16840 }, { "epoch": 2.08, "learning_rate": 1.2896793434752629e-05, "loss": 1.0669, "step": 16850 }, { "epoch": 2.09, "learning_rate": 1.2865319633645562e-05, "loss": 1.0665, "step": 16860 }, { "epoch": 2.09, "learning_rate": 1.2833870973374202e-05, "loss": 1.0414, "step": 16870 }, { "epoch": 2.09, "learning_rate": 1.2802447519094399e-05, "loss": 1.0664, "step": 16880 }, { "epoch": 2.09, "learning_rate": 1.2771049335909829e-05, "loss": 1.0634, "step": 16890 }, { "epoch": 2.09, "learning_rate": 1.2739676488871754e-05, "loss": 1.0998, "step": 16900 }, { "epoch": 2.09, "learning_rate": 1.2708329042978961e-05, "loss": 1.0705, "step": 16910 }, { "epoch": 2.09, "learning_rate": 1.2677007063177653e-05, "loss": 1.0861, "step": 16920 }, { "epoch": 2.09, "learning_rate": 1.26457106143612e-05, "loss": 1.0848, "step": 16930 }, { "epoch": 2.1, "learning_rate": 1.2614439761370128e-05, "loss": 1.0748, "step": 16940 }, { "epoch": 2.1, "learning_rate": 1.2583194568991921e-05, "loss": 1.0478, "step": 16950 }, { "epoch": 2.1, "learning_rate": 1.255197510196088e-05, "loss": 1.0528, "step": 16960 }, { "epoch": 2.1, "learning_rate": 1.2520781424958056e-05, "loss": 1.0654, "step": 16970 }, { "epoch": 2.1, "learning_rate": 1.2489613602611008e-05, "loss": 1.0721, "step": 16980 }, { "epoch": 2.1, "learning_rate": 1.2458471699493765e-05, "loss": 1.054, "step": 16990 }, { "epoch": 2.1, "learning_rate": 1.2427355780126647e-05, "loss": 1.0728, "step": 17000 }, { "epoch": 2.1, "eval_loss": 0.9933192133903503, "eval_runtime": 5.7602, "eval_samples_per_second": 89.928, "eval_steps_per_second": 11.284, "step": 17000 }, { "epoch": 2.1, "learning_rate": 1.2396265908976134e-05, "loss": 1.0699, "step": 17010 }, { "epoch": 2.11, "learning_rate": 1.2365202150454775e-05, "loss": 1.0771, "step": 17020 }, { "epoch": 2.11, "learning_rate": 1.2334164568920956e-05, "loss": 1.051, "step": 17030 }, { "epoch": 2.11, "learning_rate": 1.2303153228678866e-05, "loss": 1.0477, "step": 17040 }, { "epoch": 2.11, "learning_rate": 1.2272168193978353e-05, "loss": 1.0705, "step": 17050 }, { "epoch": 2.11, "learning_rate": 1.2241209529014705e-05, "loss": 1.0575, "step": 17060 }, { "epoch": 2.11, "learning_rate": 1.2210277297928616e-05, "loss": 1.0778, "step": 17070 }, { "epoch": 2.11, "learning_rate": 1.2179371564806006e-05, "loss": 1.0734, "step": 17080 }, { "epoch": 2.11, "learning_rate": 1.2148492393677885e-05, "loss": 1.0632, "step": 17090 }, { "epoch": 2.12, "learning_rate": 1.211763984852027e-05, "loss": 1.0603, "step": 17100 }, { "epoch": 2.12, "learning_rate": 1.2086813993253956e-05, "loss": 1.0633, "step": 17110 }, { "epoch": 2.12, "learning_rate": 1.2056014891744468e-05, "loss": 1.0286, "step": 17120 }, { "epoch": 2.12, "learning_rate": 1.202524260780194e-05, "loss": 1.0413, "step": 17130 }, { "epoch": 2.12, "learning_rate": 1.1994497205180864e-05, "loss": 1.0344, "step": 17140 }, { "epoch": 2.12, "learning_rate": 1.1963778747580126e-05, "loss": 1.074, "step": 17150 }, { "epoch": 2.12, "learning_rate": 1.1933087298642718e-05, "loss": 1.0629, "step": 17160 }, { "epoch": 2.12, "learning_rate": 1.19024229219557e-05, "loss": 1.0703, "step": 17170 }, { "epoch": 2.13, "learning_rate": 1.1871785681050075e-05, "loss": 1.0224, "step": 17180 }, { "epoch": 2.13, "learning_rate": 1.1841175639400565e-05, "loss": 1.0682, "step": 17190 }, { "epoch": 2.13, "learning_rate": 1.1810592860425582e-05, "loss": 1.0918, "step": 17200 }, { "epoch": 2.13, "learning_rate": 1.1780037407487046e-05, "loss": 1.0554, "step": 17210 }, { "epoch": 2.13, "learning_rate": 1.174950934389025e-05, "loss": 1.0611, "step": 17220 }, { "epoch": 2.13, "learning_rate": 1.171900873288378e-05, "loss": 1.0751, "step": 17230 }, { "epoch": 2.13, "learning_rate": 1.168853563765929e-05, "loss": 1.0518, "step": 17240 }, { "epoch": 2.13, "learning_rate": 1.1658090121351448e-05, "loss": 1.1095, "step": 17250 }, { "epoch": 2.14, "learning_rate": 1.1627672247037824e-05, "loss": 1.0296, "step": 17260 }, { "epoch": 2.14, "learning_rate": 1.1597282077738638e-05, "loss": 1.0758, "step": 17270 }, { "epoch": 2.14, "learning_rate": 1.1566919676416805e-05, "loss": 1.0651, "step": 17280 }, { "epoch": 2.14, "learning_rate": 1.1536585105977621e-05, "loss": 1.0812, "step": 17290 }, { "epoch": 2.14, "learning_rate": 1.150627842926877e-05, "loss": 1.0893, "step": 17300 }, { "epoch": 2.14, "learning_rate": 1.1475999709080158e-05, "loss": 1.0944, "step": 17310 }, { "epoch": 2.14, "learning_rate": 1.1445749008143722e-05, "loss": 1.0615, "step": 17320 }, { "epoch": 2.14, "learning_rate": 1.1415526389133387e-05, "loss": 1.0396, "step": 17330 }, { "epoch": 2.14, "learning_rate": 1.1385331914664887e-05, "loss": 1.0421, "step": 17340 }, { "epoch": 2.15, "learning_rate": 1.1355165647295631e-05, "loss": 1.0531, "step": 17350 }, { "epoch": 2.15, "learning_rate": 1.1325027649524633e-05, "loss": 1.0936, "step": 17360 }, { "epoch": 2.15, "learning_rate": 1.1294917983792272e-05, "loss": 1.0577, "step": 17370 }, { "epoch": 2.15, "learning_rate": 1.1264836712480267e-05, "loss": 1.0624, "step": 17380 }, { "epoch": 2.15, "learning_rate": 1.1234783897911528e-05, "loss": 1.0872, "step": 17390 }, { "epoch": 2.15, "learning_rate": 1.1204759602349951e-05, "loss": 1.0954, "step": 17400 }, { "epoch": 2.15, "learning_rate": 1.1174763888000411e-05, "loss": 1.0784, "step": 17410 }, { "epoch": 2.15, "learning_rate": 1.114479681700851e-05, "loss": 1.0936, "step": 17420 }, { "epoch": 2.16, "learning_rate": 1.1114858451460534e-05, "loss": 1.0726, "step": 17430 }, { "epoch": 2.16, "learning_rate": 1.1084948853383317e-05, "loss": 1.0388, "step": 17440 }, { "epoch": 2.16, "learning_rate": 1.1055068084744047e-05, "loss": 1.0738, "step": 17450 }, { "epoch": 2.16, "learning_rate": 1.1025216207450201e-05, "loss": 1.0499, "step": 17460 }, { "epoch": 2.16, "learning_rate": 1.0995393283349433e-05, "loss": 1.0618, "step": 17470 }, { "epoch": 2.16, "learning_rate": 1.096559937422934e-05, "loss": 1.0639, "step": 17480 }, { "epoch": 2.16, "learning_rate": 1.0935834541817489e-05, "loss": 1.0589, "step": 17490 }, { "epoch": 2.16, "learning_rate": 1.090609884778113e-05, "loss": 1.0812, "step": 17500 }, { "epoch": 2.16, "eval_loss": 0.9919288754463196, "eval_runtime": 5.7632, "eval_samples_per_second": 89.881, "eval_steps_per_second": 11.279, "step": 17500 }, { "epoch": 2.17, "learning_rate": 1.0876392353727177e-05, "loss": 1.0623, "step": 17510 }, { "epoch": 2.17, "learning_rate": 1.0846715121202075e-05, "loss": 1.0766, "step": 17520 }, { "epoch": 2.17, "learning_rate": 1.0817067211691584e-05, "loss": 1.091, "step": 17530 }, { "epoch": 2.17, "learning_rate": 1.0787448686620757e-05, "loss": 1.0658, "step": 17540 }, { "epoch": 2.17, "learning_rate": 1.0757859607353757e-05, "loss": 1.0578, "step": 17550 }, { "epoch": 2.17, "learning_rate": 1.072830003519372e-05, "loss": 1.0772, "step": 17560 }, { "epoch": 2.17, "learning_rate": 1.0698770031382702e-05, "loss": 1.079, "step": 17570 }, { "epoch": 2.17, "learning_rate": 1.0669269657101433e-05, "loss": 1.0738, "step": 17580 }, { "epoch": 2.18, "learning_rate": 1.0639798973469286e-05, "loss": 1.0909, "step": 17590 }, { "epoch": 2.18, "learning_rate": 1.0610358041544152e-05, "loss": 1.0681, "step": 17600 }, { "epoch": 2.18, "learning_rate": 1.0580946922322211e-05, "loss": 1.0665, "step": 17610 }, { "epoch": 2.18, "learning_rate": 1.0551565676737954e-05, "loss": 1.0916, "step": 17620 }, { "epoch": 2.18, "learning_rate": 1.0522214365663912e-05, "loss": 1.0602, "step": 17630 }, { "epoch": 2.18, "learning_rate": 1.0492893049910632e-05, "loss": 1.0712, "step": 17640 }, { "epoch": 2.18, "learning_rate": 1.0463601790226536e-05, "loss": 1.0618, "step": 17650 }, { "epoch": 2.18, "learning_rate": 1.0434340647297722e-05, "loss": 1.067, "step": 17660 }, { "epoch": 2.19, "learning_rate": 1.040510968174793e-05, "loss": 1.0851, "step": 17670 }, { "epoch": 2.19, "learning_rate": 1.0375908954138366e-05, "loss": 1.0737, "step": 17680 }, { "epoch": 2.19, "learning_rate": 1.034673852496759e-05, "loss": 1.0422, "step": 17690 }, { "epoch": 2.19, "learning_rate": 1.0317598454671403e-05, "loss": 1.0621, "step": 17700 }, { "epoch": 2.19, "learning_rate": 1.0288488803622682e-05, "loss": 1.0654, "step": 17710 }, { "epoch": 2.19, "learning_rate": 1.0259409632131281e-05, "loss": 1.0501, "step": 17720 }, { "epoch": 2.19, "learning_rate": 1.0230361000443955e-05, "loss": 1.0637, "step": 17730 }, { "epoch": 2.19, "learning_rate": 1.020134296874411e-05, "loss": 1.0703, "step": 17740 }, { "epoch": 2.2, "learning_rate": 1.0172355597151827e-05, "loss": 1.0653, "step": 17750 }, { "epoch": 2.2, "learning_rate": 1.0143398945723612e-05, "loss": 1.0522, "step": 17760 }, { "epoch": 2.2, "learning_rate": 1.0114473074452338e-05, "loss": 1.0574, "step": 17770 }, { "epoch": 2.2, "learning_rate": 1.0085578043267146e-05, "loss": 1.054, "step": 17780 }, { "epoch": 2.2, "learning_rate": 1.005671391203322e-05, "loss": 1.088, "step": 17790 }, { "epoch": 2.2, "learning_rate": 1.002788074055175e-05, "loss": 1.0259, "step": 17800 }, { "epoch": 2.2, "learning_rate": 9.99907858855982e-06, "loss": 1.081, "step": 17810 }, { "epoch": 2.2, "learning_rate": 9.970307515730182e-06, "loss": 1.0542, "step": 17820 }, { "epoch": 2.21, "learning_rate": 9.94156758167126e-06, "loss": 1.0599, "step": 17830 }, { "epoch": 2.21, "learning_rate": 9.912858845926908e-06, "loss": 1.0504, "step": 17840 }, { "epoch": 2.21, "learning_rate": 9.884181367976375e-06, "loss": 1.0625, "step": 17850 }, { "epoch": 2.21, "learning_rate": 9.855535207234164e-06, "loss": 1.0617, "step": 17860 }, { "epoch": 2.21, "learning_rate": 9.826920423049845e-06, "loss": 1.0646, "step": 17870 }, { "epoch": 2.21, "learning_rate": 9.798337074708044e-06, "loss": 1.0793, "step": 17880 }, { "epoch": 2.21, "learning_rate": 9.769785221428199e-06, "loss": 1.0962, "step": 17890 }, { "epoch": 2.21, "learning_rate": 9.741264922364521e-06, "loss": 1.0559, "step": 17900 }, { "epoch": 2.22, "learning_rate": 9.712776236605873e-06, "loss": 1.0964, "step": 17910 }, { "epoch": 2.22, "learning_rate": 9.68431922317557e-06, "loss": 1.0542, "step": 17920 }, { "epoch": 2.22, "learning_rate": 9.655893941031332e-06, "loss": 1.0214, "step": 17930 }, { "epoch": 2.22, "learning_rate": 9.627500449065166e-06, "loss": 1.0733, "step": 17940 }, { "epoch": 2.22, "learning_rate": 9.599138806103153e-06, "loss": 1.0449, "step": 17950 }, { "epoch": 2.22, "learning_rate": 9.570809070905456e-06, "loss": 1.0836, "step": 17960 }, { "epoch": 2.22, "learning_rate": 9.542511302166079e-06, "loss": 1.0487, "step": 17970 }, { "epoch": 2.22, "learning_rate": 9.51424555851281e-06, "loss": 1.0717, "step": 17980 }, { "epoch": 2.23, "learning_rate": 9.486011898507125e-06, "loss": 1.0881, "step": 17990 }, { "epoch": 2.23, "learning_rate": 9.45781038064396e-06, "loss": 1.0644, "step": 18000 }, { "epoch": 2.23, "eval_loss": 0.9902282357215881, "eval_runtime": 5.7605, "eval_samples_per_second": 89.922, "eval_steps_per_second": 11.284, "step": 18000 }, { "epoch": 2.23, "learning_rate": 9.429641063351735e-06, "loss": 1.1128, "step": 18010 }, { "epoch": 2.23, "learning_rate": 9.401504004992088e-06, "loss": 1.0436, "step": 18020 }, { "epoch": 2.23, "learning_rate": 9.373399263859848e-06, "loss": 1.0805, "step": 18030 }, { "epoch": 2.23, "learning_rate": 9.345326898182924e-06, "loss": 1.0437, "step": 18040 }, { "epoch": 2.23, "learning_rate": 9.317286966122091e-06, "loss": 1.0439, "step": 18050 }, { "epoch": 2.23, "learning_rate": 9.28927952577095e-06, "loss": 1.0882, "step": 18060 }, { "epoch": 2.24, "learning_rate": 9.261304635155817e-06, "loss": 1.0604, "step": 18070 }, { "epoch": 2.24, "learning_rate": 9.233362352235508e-06, "loss": 1.0871, "step": 18080 }, { "epoch": 2.24, "learning_rate": 9.205452734901354e-06, "loss": 1.0469, "step": 18090 }, { "epoch": 2.24, "learning_rate": 9.17757584097694e-06, "loss": 1.0601, "step": 18100 }, { "epoch": 2.24, "learning_rate": 9.149731728218089e-06, "loss": 1.0607, "step": 18110 }, { "epoch": 2.24, "learning_rate": 9.121920454312733e-06, "loss": 1.0605, "step": 18120 }, { "epoch": 2.24, "learning_rate": 9.094142076880705e-06, "loss": 1.0696, "step": 18130 }, { "epoch": 2.24, "learning_rate": 9.066396653473724e-06, "loss": 1.0658, "step": 18140 }, { "epoch": 2.25, "learning_rate": 9.038684241575248e-06, "loss": 1.0878, "step": 18150 }, { "epoch": 2.25, "learning_rate": 9.01100489860029e-06, "loss": 1.0583, "step": 18160 }, { "epoch": 2.25, "learning_rate": 8.983358681895407e-06, "loss": 1.0577, "step": 18170 }, { "epoch": 2.25, "learning_rate": 8.955745648738482e-06, "loss": 1.0637, "step": 18180 }, { "epoch": 2.25, "learning_rate": 8.928165856338652e-06, "loss": 1.0496, "step": 18190 }, { "epoch": 2.25, "learning_rate": 8.900619361836232e-06, "loss": 1.0465, "step": 18200 }, { "epoch": 2.25, "learning_rate": 8.873106222302477e-06, "loss": 1.03, "step": 18210 }, { "epoch": 2.25, "learning_rate": 8.845626494739608e-06, "loss": 1.0413, "step": 18220 }, { "epoch": 2.26, "learning_rate": 8.818180236080561e-06, "loss": 1.0235, "step": 18230 }, { "epoch": 2.26, "learning_rate": 8.790767503188962e-06, "loss": 1.0676, "step": 18240 }, { "epoch": 2.26, "learning_rate": 8.763388352858998e-06, "loss": 1.0355, "step": 18250 }, { "epoch": 2.26, "learning_rate": 8.736042841815228e-06, "loss": 1.0808, "step": 18260 }, { "epoch": 2.26, "learning_rate": 8.708731026712546e-06, "loss": 1.0702, "step": 18270 }, { "epoch": 2.26, "learning_rate": 8.681452964136055e-06, "loss": 1.0801, "step": 18280 }, { "epoch": 2.26, "learning_rate": 8.654208710600872e-06, "loss": 1.0613, "step": 18290 }, { "epoch": 2.26, "learning_rate": 8.626998322552139e-06, "loss": 1.0454, "step": 18300 }, { "epoch": 2.26, "learning_rate": 8.599821856364764e-06, "loss": 1.0556, "step": 18310 }, { "epoch": 2.27, "learning_rate": 8.57267936834342e-06, "loss": 1.0851, "step": 18320 }, { "epoch": 2.27, "learning_rate": 8.545570914722387e-06, "loss": 1.1057, "step": 18330 }, { "epoch": 2.27, "learning_rate": 8.51849655166539e-06, "loss": 1.0475, "step": 18340 }, { "epoch": 2.27, "learning_rate": 8.49145633526558e-06, "loss": 1.1336, "step": 18350 }, { "epoch": 2.27, "learning_rate": 8.464450321545314e-06, "loss": 1.0776, "step": 18360 }, { "epoch": 2.27, "learning_rate": 8.437478566456102e-06, "loss": 1.0983, "step": 18370 }, { "epoch": 2.27, "learning_rate": 8.410541125878512e-06, "loss": 1.0606, "step": 18380 }, { "epoch": 2.27, "learning_rate": 8.383638055621961e-06, "loss": 1.0602, "step": 18390 }, { "epoch": 2.28, "learning_rate": 8.356769411424683e-06, "loss": 1.0135, "step": 18400 }, { "epoch": 2.28, "learning_rate": 8.329935248953616e-06, "loss": 1.0526, "step": 18410 }, { "epoch": 2.28, "learning_rate": 8.303135623804195e-06, "loss": 1.0338, "step": 18420 }, { "epoch": 2.28, "learning_rate": 8.27637059150037e-06, "loss": 1.0835, "step": 18430 }, { "epoch": 2.28, "learning_rate": 8.249640207494367e-06, "loss": 1.0497, "step": 18440 }, { "epoch": 2.28, "learning_rate": 8.22294452716664e-06, "loss": 1.0445, "step": 18450 }, { "epoch": 2.28, "learning_rate": 8.196283605825777e-06, "loss": 1.1042, "step": 18460 }, { "epoch": 2.28, "learning_rate": 8.169657498708295e-06, "loss": 1.043, "step": 18470 }, { "epoch": 2.29, "learning_rate": 8.143066260978632e-06, "loss": 1.0481, "step": 18480 }, { "epoch": 2.29, "learning_rate": 8.11650994772897e-06, "loss": 1.1235, "step": 18490 }, { "epoch": 2.29, "learning_rate": 8.089988613979097e-06, "loss": 1.0619, "step": 18500 }, { "epoch": 2.29, "eval_loss": 0.9900073409080505, "eval_runtime": 5.7651, "eval_samples_per_second": 89.851, "eval_steps_per_second": 11.275, "step": 18500 }, { "epoch": 2.29, "learning_rate": 8.063502314676394e-06, "loss": 1.0631, "step": 18510 }, { "epoch": 2.29, "learning_rate": 8.0370511046956e-06, "loss": 1.07, "step": 18520 }, { "epoch": 2.29, "learning_rate": 8.010635038838781e-06, "loss": 1.0439, "step": 18530 }, { "epoch": 2.29, "learning_rate": 7.984254171835207e-06, "loss": 1.0427, "step": 18540 }, { "epoch": 2.29, "learning_rate": 7.957908558341177e-06, "loss": 1.089, "step": 18550 }, { "epoch": 2.3, "learning_rate": 7.931598252940003e-06, "loss": 1.0909, "step": 18560 }, { "epoch": 2.3, "learning_rate": 7.905323310141805e-06, "loss": 1.0531, "step": 18570 }, { "epoch": 2.3, "learning_rate": 7.879083784383445e-06, "loss": 1.093, "step": 18580 }, { "epoch": 2.3, "learning_rate": 7.852879730028446e-06, "loss": 1.1315, "step": 18590 }, { "epoch": 2.3, "learning_rate": 7.826711201366782e-06, "loss": 1.0864, "step": 18600 }, { "epoch": 2.3, "learning_rate": 7.800578252614853e-06, "loss": 1.0541, "step": 18610 }, { "epoch": 2.3, "learning_rate": 7.774480937915371e-06, "loss": 1.057, "step": 18620 }, { "epoch": 2.3, "learning_rate": 7.748419311337157e-06, "loss": 1.0301, "step": 18630 }, { "epoch": 2.31, "learning_rate": 7.722393426875166e-06, "loss": 1.0221, "step": 18640 }, { "epoch": 2.31, "learning_rate": 7.696403338450233e-06, "loss": 1.0777, "step": 18650 }, { "epoch": 2.31, "learning_rate": 7.670449099909072e-06, "loss": 1.1002, "step": 18660 }, { "epoch": 2.31, "learning_rate": 7.64453076502413e-06, "loss": 1.0384, "step": 18670 }, { "epoch": 2.31, "learning_rate": 7.618648387493418e-06, "loss": 1.0581, "step": 18680 }, { "epoch": 2.31, "learning_rate": 7.5928020209405205e-06, "loss": 1.0868, "step": 18690 }, { "epoch": 2.31, "learning_rate": 7.566991718914349e-06, "loss": 1.0309, "step": 18700 }, { "epoch": 2.31, "learning_rate": 7.541217534889122e-06, "loss": 1.0713, "step": 18710 }, { "epoch": 2.32, "learning_rate": 7.515479522264254e-06, "loss": 1.0928, "step": 18720 }, { "epoch": 2.32, "learning_rate": 7.489777734364173e-06, "loss": 1.0612, "step": 18730 }, { "epoch": 2.32, "learning_rate": 7.464112224438274e-06, "loss": 1.0831, "step": 18740 }, { "epoch": 2.32, "learning_rate": 7.438483045660818e-06, "loss": 1.0765, "step": 18750 }, { "epoch": 2.32, "learning_rate": 7.412890251130741e-06, "loss": 1.0827, "step": 18760 }, { "epoch": 2.32, "learning_rate": 7.387333893871657e-06, "loss": 1.0807, "step": 18770 }, { "epoch": 2.32, "learning_rate": 7.361814026831632e-06, "loss": 1.0931, "step": 18780 }, { "epoch": 2.32, "learning_rate": 7.3363307028831625e-06, "loss": 1.094, "step": 18790 }, { "epoch": 2.33, "learning_rate": 7.310883974823052e-06, "loss": 1.0258, "step": 18800 }, { "epoch": 2.33, "learning_rate": 7.285473895372224e-06, "loss": 1.0403, "step": 18810 }, { "epoch": 2.33, "learning_rate": 7.260100517175744e-06, "loss": 1.051, "step": 18820 }, { "epoch": 2.33, "learning_rate": 7.234763892802598e-06, "loss": 1.0774, "step": 18830 }, { "epoch": 2.33, "learning_rate": 7.2094640747456085e-06, "loss": 1.0448, "step": 18840 }, { "epoch": 2.33, "learning_rate": 7.184201115421402e-06, "loss": 1.0844, "step": 18850 }, { "epoch": 2.33, "learning_rate": 7.158975067170179e-06, "loss": 1.0424, "step": 18860 }, { "epoch": 2.33, "learning_rate": 7.133785982255689e-06, "loss": 1.0793, "step": 18870 }, { "epoch": 2.34, "learning_rate": 7.108633912865128e-06, "loss": 1.0719, "step": 18880 }, { "epoch": 2.34, "learning_rate": 7.083518911108952e-06, "loss": 1.045, "step": 18890 }, { "epoch": 2.34, "learning_rate": 7.058441029020871e-06, "loss": 1.0393, "step": 18900 }, { "epoch": 2.34, "learning_rate": 7.033400318557642e-06, "loss": 1.0579, "step": 18910 }, { "epoch": 2.34, "learning_rate": 7.0083968315990375e-06, "loss": 1.0854, "step": 18920 }, { "epoch": 2.34, "learning_rate": 6.98343061994772e-06, "loss": 1.0449, "step": 18930 }, { "epoch": 2.34, "learning_rate": 6.958501735329081e-06, "loss": 1.0826, "step": 18940 }, { "epoch": 2.34, "learning_rate": 6.933610229391227e-06, "loss": 1.0632, "step": 18950 }, { "epoch": 2.35, "learning_rate": 6.908756153704801e-06, "loss": 1.0435, "step": 18960 }, { "epoch": 2.35, "learning_rate": 6.883939559762873e-06, "loss": 1.0785, "step": 18970 }, { "epoch": 2.35, "learning_rate": 6.859160498980912e-06, "loss": 1.0417, "step": 18980 }, { "epoch": 2.35, "learning_rate": 6.834419022696573e-06, "loss": 1.0594, "step": 18990 }, { "epoch": 2.35, "learning_rate": 6.809715182169663e-06, "loss": 1.0554, "step": 19000 }, { "epoch": 2.35, "eval_loss": 0.9890897870063782, "eval_runtime": 5.7676, "eval_samples_per_second": 89.812, "eval_steps_per_second": 11.27, "step": 19000 }, { "epoch": 2.35, "learning_rate": 6.785049028582041e-06, "loss": 1.0474, "step": 19010 }, { "epoch": 2.35, "learning_rate": 6.760420613037424e-06, "loss": 1.0438, "step": 19020 }, { "epoch": 2.35, "learning_rate": 6.735829986561418e-06, "loss": 1.0598, "step": 19030 }, { "epoch": 2.36, "learning_rate": 6.711277200101271e-06, "loss": 1.0698, "step": 19040 }, { "epoch": 2.36, "learning_rate": 6.686762304525859e-06, "loss": 1.0565, "step": 19050 }, { "epoch": 2.36, "learning_rate": 6.662285350625583e-06, "loss": 1.0269, "step": 19060 }, { "epoch": 2.36, "learning_rate": 6.637846389112176e-06, "loss": 1.0512, "step": 19070 }, { "epoch": 2.36, "learning_rate": 6.613445470618718e-06, "loss": 1.0744, "step": 19080 }, { "epoch": 2.36, "learning_rate": 6.589082645699441e-06, "loss": 1.0649, "step": 19090 }, { "epoch": 2.36, "learning_rate": 6.564757964829632e-06, "loss": 1.0165, "step": 19100 }, { "epoch": 2.36, "learning_rate": 6.540471478405611e-06, "loss": 1.0079, "step": 19110 }, { "epoch": 2.37, "learning_rate": 6.516223236744503e-06, "loss": 1.0659, "step": 19120 }, { "epoch": 2.37, "learning_rate": 6.492013290084231e-06, "loss": 1.1184, "step": 19130 }, { "epoch": 2.37, "learning_rate": 6.4678416885833895e-06, "loss": 1.0566, "step": 19140 }, { "epoch": 2.37, "learning_rate": 6.443708482321081e-06, "loss": 1.0528, "step": 19150 }, { "epoch": 2.37, "learning_rate": 6.41961372129691e-06, "loss": 1.0871, "step": 19160 }, { "epoch": 2.37, "learning_rate": 6.395557455430814e-06, "loss": 1.0871, "step": 19170 }, { "epoch": 2.37, "learning_rate": 6.371539734562948e-06, "loss": 1.0643, "step": 19180 }, { "epoch": 2.37, "learning_rate": 6.347560608453659e-06, "loss": 1.0813, "step": 19190 }, { "epoch": 2.38, "learning_rate": 6.323620126783284e-06, "loss": 1.0611, "step": 19200 }, { "epoch": 2.38, "learning_rate": 6.299718339152117e-06, "loss": 1.0547, "step": 19210 }, { "epoch": 2.38, "learning_rate": 6.275855295080304e-06, "loss": 1.0533, "step": 19220 }, { "epoch": 2.38, "learning_rate": 6.252031044007681e-06, "loss": 1.0614, "step": 19230 }, { "epoch": 2.38, "learning_rate": 6.228245635293758e-06, "loss": 1.0493, "step": 19240 }, { "epoch": 2.38, "learning_rate": 6.204499118217524e-06, "loss": 1.08, "step": 19250 }, { "epoch": 2.38, "learning_rate": 6.180791541977418e-06, "loss": 1.0575, "step": 19260 }, { "epoch": 2.38, "learning_rate": 6.157122955691216e-06, "loss": 1.1035, "step": 19270 }, { "epoch": 2.38, "learning_rate": 6.1334934083958655e-06, "loss": 1.0642, "step": 19280 }, { "epoch": 2.39, "learning_rate": 6.109902949047483e-06, "loss": 1.0261, "step": 19290 }, { "epoch": 2.39, "learning_rate": 6.086351626521181e-06, "loss": 1.0732, "step": 19300 }, { "epoch": 2.39, "learning_rate": 6.062839489610967e-06, "loss": 1.068, "step": 19310 }, { "epoch": 2.39, "learning_rate": 6.03936658702971e-06, "loss": 1.0643, "step": 19320 }, { "epoch": 2.39, "learning_rate": 6.015932967408943e-06, "loss": 1.0663, "step": 19330 }, { "epoch": 2.39, "learning_rate": 5.9925386792988426e-06, "loss": 1.0062, "step": 19340 }, { "epoch": 2.39, "learning_rate": 5.969183771168102e-06, "loss": 1.0926, "step": 19350 }, { "epoch": 2.39, "learning_rate": 5.945868291403792e-06, "loss": 1.0744, "step": 19360 }, { "epoch": 2.4, "learning_rate": 5.922592288311343e-06, "loss": 1.0957, "step": 19370 }, { "epoch": 2.4, "learning_rate": 5.899355810114349e-06, "loss": 1.0783, "step": 19380 }, { "epoch": 2.4, "learning_rate": 5.87615890495454e-06, "loss": 1.0766, "step": 19390 }, { "epoch": 2.4, "learning_rate": 5.853001620891679e-06, "loss": 1.0694, "step": 19400 }, { "epoch": 2.4, "learning_rate": 5.829884005903385e-06, "loss": 1.0366, "step": 19410 }, { "epoch": 2.4, "learning_rate": 5.806806107885149e-06, "loss": 1.0696, "step": 19420 }, { "epoch": 2.4, "learning_rate": 5.783767974650145e-06, "loss": 1.0939, "step": 19430 }, { "epoch": 2.4, "learning_rate": 5.760769653929149e-06, "loss": 1.0562, "step": 19440 }, { "epoch": 2.41, "learning_rate": 5.737811193370499e-06, "loss": 1.051, "step": 19450 }, { "epoch": 2.41, "learning_rate": 5.7148926405398985e-06, "loss": 1.0486, "step": 19460 }, { "epoch": 2.41, "learning_rate": 5.692014042920391e-06, "loss": 1.1251, "step": 19470 }, { "epoch": 2.41, "learning_rate": 5.6691754479122615e-06, "loss": 1.0897, "step": 19480 }, { "epoch": 2.41, "learning_rate": 5.646376902832876e-06, "loss": 1.0399, "step": 19490 }, { "epoch": 2.41, "learning_rate": 5.6236184549166574e-06, "loss": 1.0714, "step": 19500 }, { "epoch": 2.41, "eval_loss": 0.9904816746711731, "eval_runtime": 5.7632, "eval_samples_per_second": 89.881, "eval_steps_per_second": 11.279, "step": 19500 }, { "epoch": 2.41, "learning_rate": 5.6009001513149474e-06, "loss": 1.0641, "step": 19510 }, { "epoch": 2.41, "learning_rate": 5.57822203909589e-06, "loss": 1.082, "step": 19520 }, { "epoch": 2.42, "learning_rate": 5.555584165244407e-06, "loss": 1.0902, "step": 19530 }, { "epoch": 2.42, "learning_rate": 5.532986576662003e-06, "loss": 1.0404, "step": 19540 }, { "epoch": 2.42, "learning_rate": 5.51042932016676e-06, "loss": 1.0836, "step": 19550 }, { "epoch": 2.42, "learning_rate": 5.487912442493187e-06, "loss": 1.0784, "step": 19560 }, { "epoch": 2.42, "learning_rate": 5.465435990292106e-06, "loss": 1.0678, "step": 19570 }, { "epoch": 2.42, "learning_rate": 5.443000010130645e-06, "loss": 1.0581, "step": 19580 }, { "epoch": 2.42, "learning_rate": 5.42060454849202e-06, "loss": 1.0626, "step": 19590 }, { "epoch": 2.42, "learning_rate": 5.398249651775527e-06, "loss": 1.0647, "step": 19600 }, { "epoch": 2.43, "learning_rate": 5.3759353662964414e-06, "loss": 1.0915, "step": 19610 }, { "epoch": 2.43, "learning_rate": 5.353661738285848e-06, "loss": 1.0359, "step": 19620 }, { "epoch": 2.43, "learning_rate": 5.331428813890649e-06, "loss": 1.0776, "step": 19630 }, { "epoch": 2.43, "learning_rate": 5.309236639173387e-06, "loss": 1.1055, "step": 19640 }, { "epoch": 2.43, "learning_rate": 5.2870852601121676e-06, "loss": 1.0404, "step": 19650 }, { "epoch": 2.43, "learning_rate": 5.264974722600618e-06, "loss": 1.051, "step": 19660 }, { "epoch": 2.43, "learning_rate": 5.242905072447704e-06, "loss": 1.0842, "step": 19670 }, { "epoch": 2.43, "learning_rate": 5.220876355377699e-06, "loss": 1.0574, "step": 19680 }, { "epoch": 2.44, "learning_rate": 5.19888861703009e-06, "loss": 1.0616, "step": 19690 }, { "epoch": 2.44, "learning_rate": 5.17694190295942e-06, "loss": 1.0672, "step": 19700 }, { "epoch": 2.44, "learning_rate": 5.155036258635287e-06, "loss": 1.0508, "step": 19710 }, { "epoch": 2.44, "learning_rate": 5.133171729442154e-06, "loss": 1.0613, "step": 19720 }, { "epoch": 2.44, "learning_rate": 5.111348360679327e-06, "loss": 1.0746, "step": 19730 }, { "epoch": 2.44, "learning_rate": 5.089566197560841e-06, "loss": 1.0487, "step": 19740 }, { "epoch": 2.44, "learning_rate": 5.06782528521533e-06, "loss": 1.0551, "step": 19750 }, { "epoch": 2.44, "learning_rate": 5.046125668686003e-06, "loss": 1.0872, "step": 19760 }, { "epoch": 2.45, "learning_rate": 5.024467392930488e-06, "loss": 1.0443, "step": 19770 }, { "epoch": 2.45, "learning_rate": 5.00285050282075e-06, "loss": 1.0377, "step": 19780 }, { "epoch": 2.45, "learning_rate": 4.98127504314305e-06, "loss": 1.0697, "step": 19790 }, { "epoch": 2.45, "learning_rate": 4.959741058597772e-06, "loss": 1.0474, "step": 19800 }, { "epoch": 2.45, "learning_rate": 4.938248593799388e-06, "loss": 1.0707, "step": 19810 }, { "epoch": 2.45, "learning_rate": 4.916797693276373e-06, "loss": 1.0609, "step": 19820 }, { "epoch": 2.45, "learning_rate": 4.895388401471035e-06, "loss": 1.0783, "step": 19830 }, { "epoch": 2.45, "learning_rate": 4.874020762739526e-06, "loss": 1.0768, "step": 19840 }, { "epoch": 2.46, "learning_rate": 4.85269482135168e-06, "loss": 1.0362, "step": 19850 }, { "epoch": 2.46, "learning_rate": 4.831410621490925e-06, "loss": 1.0707, "step": 19860 }, { "epoch": 2.46, "learning_rate": 4.8101682072542536e-06, "loss": 1.0705, "step": 19870 }, { "epoch": 2.46, "learning_rate": 4.788967622652027e-06, "loss": 1.0915, "step": 19880 }, { "epoch": 2.46, "learning_rate": 4.767808911607999e-06, "loss": 1.0849, "step": 19890 }, { "epoch": 2.46, "learning_rate": 4.746692117959142e-06, "loss": 1.0637, "step": 19900 }, { "epoch": 2.46, "learning_rate": 4.725617285455564e-06, "loss": 1.053, "step": 19910 }, { "epoch": 2.46, "learning_rate": 4.704584457760489e-06, "loss": 1.0425, "step": 19920 }, { "epoch": 2.47, "learning_rate": 4.683593678450063e-06, "loss": 1.0338, "step": 19930 }, { "epoch": 2.47, "learning_rate": 4.66264499101334e-06, "loss": 1.0477, "step": 19940 }, { "epoch": 2.47, "learning_rate": 4.641738438852178e-06, "loss": 1.0581, "step": 19950 }, { "epoch": 2.47, "learning_rate": 4.620874065281103e-06, "loss": 1.092, "step": 19960 }, { "epoch": 2.47, "learning_rate": 4.6000519135272955e-06, "loss": 1.0486, "step": 19970 }, { "epoch": 2.47, "learning_rate": 4.579272026730441e-06, "loss": 1.0855, "step": 19980 }, { "epoch": 2.47, "learning_rate": 4.558534447942639e-06, "loss": 1.0563, "step": 19990 }, { "epoch": 2.47, "learning_rate": 4.537839220128384e-06, "loss": 1.0611, "step": 20000 }, { "epoch": 2.47, "eval_loss": 0.9869465231895447, "eval_runtime": 5.7657, "eval_samples_per_second": 89.841, "eval_steps_per_second": 11.274, "step": 20000 }, { "epoch": 2.48, "learning_rate": 4.517186386164371e-06, "loss": 1.0839, "step": 20010 }, { "epoch": 2.48, "learning_rate": 4.4965759888395104e-06, "loss": 1.0517, "step": 20020 }, { "epoch": 2.48, "learning_rate": 4.476008070854768e-06, "loss": 1.0788, "step": 20030 }, { "epoch": 2.48, "learning_rate": 4.455482674823089e-06, "loss": 1.0623, "step": 20040 }, { "epoch": 2.48, "learning_rate": 4.434999843269361e-06, "loss": 1.091, "step": 20050 }, { "epoch": 2.48, "learning_rate": 4.414559618630237e-06, "loss": 1.0405, "step": 20060 }, { "epoch": 2.48, "learning_rate": 4.394162043254122e-06, "loss": 1.0906, "step": 20070 }, { "epoch": 2.48, "learning_rate": 4.373807159401075e-06, "loss": 1.0347, "step": 20080 }, { "epoch": 2.49, "learning_rate": 4.353495009242667e-06, "loss": 1.0644, "step": 20090 }, { "epoch": 2.49, "learning_rate": 4.333225634861968e-06, "loss": 1.0723, "step": 20100 }, { "epoch": 2.49, "learning_rate": 4.312999078253413e-06, "loss": 1.0962, "step": 20110 }, { "epoch": 2.49, "learning_rate": 4.292815381322701e-06, "loss": 1.024, "step": 20120 }, { "epoch": 2.49, "learning_rate": 4.272674585886785e-06, "loss": 1.067, "step": 20130 }, { "epoch": 2.49, "learning_rate": 4.25257673367368e-06, "loss": 1.1052, "step": 20140 }, { "epoch": 2.49, "learning_rate": 4.23252186632247e-06, "loss": 1.0812, "step": 20150 }, { "epoch": 2.49, "learning_rate": 4.212510025383173e-06, "loss": 1.0749, "step": 20160 }, { "epoch": 2.5, "learning_rate": 4.19254125231664e-06, "loss": 1.0159, "step": 20170 }, { "epoch": 2.5, "learning_rate": 4.172615588494527e-06, "loss": 1.0822, "step": 20180 }, { "epoch": 2.5, "learning_rate": 4.152733075199161e-06, "loss": 1.0417, "step": 20190 }, { "epoch": 2.5, "learning_rate": 4.132893753623457e-06, "loss": 1.0373, "step": 20200 }, { "epoch": 2.5, "learning_rate": 4.1130976648708715e-06, "loss": 1.0937, "step": 20210 }, { "epoch": 2.5, "learning_rate": 4.093344849955258e-06, "loss": 1.0591, "step": 20220 }, { "epoch": 2.5, "learning_rate": 4.07363534980085e-06, "loss": 1.0878, "step": 20230 }, { "epoch": 2.5, "learning_rate": 4.05396920524212e-06, "loss": 1.0599, "step": 20240 }, { "epoch": 2.5, "learning_rate": 4.034346457023699e-06, "loss": 1.1052, "step": 20250 }, { "epoch": 2.51, "learning_rate": 4.014767145800355e-06, "loss": 1.0553, "step": 20260 }, { "epoch": 2.51, "learning_rate": 3.995231312136818e-06, "loss": 1.0591, "step": 20270 }, { "epoch": 2.51, "learning_rate": 3.975738996507758e-06, "loss": 1.0797, "step": 20280 }, { "epoch": 2.51, "learning_rate": 3.9562902392976995e-06, "loss": 1.1089, "step": 20290 }, { "epoch": 2.51, "learning_rate": 3.936885080800884e-06, "loss": 1.0423, "step": 20300 }, { "epoch": 2.51, "learning_rate": 3.9175235612212615e-06, "loss": 1.0509, "step": 20310 }, { "epoch": 2.51, "learning_rate": 3.89820572067236e-06, "loss": 1.0735, "step": 20320 }, { "epoch": 2.51, "learning_rate": 3.878931599177188e-06, "loss": 1.048, "step": 20330 }, { "epoch": 2.52, "learning_rate": 3.859701236668217e-06, "loss": 1.0506, "step": 20340 }, { "epoch": 2.52, "learning_rate": 3.840514672987217e-06, "loss": 1.0547, "step": 20350 }, { "epoch": 2.52, "learning_rate": 3.82137194788525e-06, "loss": 1.0245, "step": 20360 }, { "epoch": 2.52, "learning_rate": 3.802273101022538e-06, "loss": 1.0391, "step": 20370 }, { "epoch": 2.52, "learning_rate": 3.783218171968378e-06, "loss": 1.0676, "step": 20380 }, { "epoch": 2.52, "learning_rate": 3.764207200201114e-06, "loss": 1.0483, "step": 20390 }, { "epoch": 2.52, "learning_rate": 3.7452402251079847e-06, "loss": 1.093, "step": 20400 }, { "epoch": 2.52, "learning_rate": 3.726317285985087e-06, "loss": 1.1126, "step": 20410 }, { "epoch": 2.53, "learning_rate": 3.7074384220373013e-06, "loss": 1.0411, "step": 20420 }, { "epoch": 2.53, "learning_rate": 3.6886036723781573e-06, "loss": 1.0643, "step": 20430 }, { "epoch": 2.53, "learning_rate": 3.669813076029824e-06, "loss": 1.0714, "step": 20440 }, { "epoch": 2.53, "learning_rate": 3.6510666719229758e-06, "loss": 1.0847, "step": 20450 }, { "epoch": 2.53, "learning_rate": 3.632364498896712e-06, "loss": 1.046, "step": 20460 }, { "epoch": 2.53, "learning_rate": 3.613706595698532e-06, "loss": 1.0629, "step": 20470 }, { "epoch": 2.53, "learning_rate": 3.595093000984173e-06, "loss": 1.0659, "step": 20480 }, { "epoch": 2.53, "learning_rate": 3.576523753317612e-06, "loss": 1.0641, "step": 20490 }, { "epoch": 2.54, "learning_rate": 3.557998891170933e-06, "loss": 1.0941, "step": 20500 }, { "epoch": 2.54, "eval_loss": 0.9862278699874878, "eval_runtime": 5.7611, "eval_samples_per_second": 89.913, "eval_steps_per_second": 11.283, "step": 20500 }, { "epoch": 2.54, "learning_rate": 3.5395184529242343e-06, "loss": 1.0178, "step": 20510 }, { "epoch": 2.54, "learning_rate": 3.5210824768656224e-06, "loss": 1.0527, "step": 20520 }, { "epoch": 2.54, "learning_rate": 3.5026910011910633e-06, "loss": 1.0735, "step": 20530 }, { "epoch": 2.54, "learning_rate": 3.484344064004308e-06, "loss": 1.0969, "step": 20540 }, { "epoch": 2.54, "learning_rate": 3.4660417033168718e-06, "loss": 1.0721, "step": 20550 }, { "epoch": 2.54, "learning_rate": 3.4477839570478785e-06, "loss": 1.0474, "step": 20560 }, { "epoch": 2.54, "learning_rate": 3.429570863024045e-06, "loss": 1.0736, "step": 20570 }, { "epoch": 2.55, "learning_rate": 3.4114024589795736e-06, "loss": 1.0749, "step": 20580 }, { "epoch": 2.55, "learning_rate": 3.3932787825560476e-06, "loss": 1.0426, "step": 20590 }, { "epoch": 2.55, "learning_rate": 3.3751998713024314e-06, "loss": 1.05, "step": 20600 }, { "epoch": 2.55, "learning_rate": 3.3571657626748963e-06, "loss": 1.0541, "step": 20610 }, { "epoch": 2.55, "learning_rate": 3.3391764940368305e-06, "loss": 1.0891, "step": 20620 }, { "epoch": 2.55, "learning_rate": 3.321232102658703e-06, "loss": 1.0593, "step": 20630 }, { "epoch": 2.55, "learning_rate": 3.303332625717989e-06, "loss": 1.0028, "step": 20640 }, { "epoch": 2.55, "learning_rate": 3.285478100299147e-06, "loss": 1.0737, "step": 20650 }, { "epoch": 2.56, "learning_rate": 3.2676685633934808e-06, "loss": 1.0911, "step": 20660 }, { "epoch": 2.56, "learning_rate": 3.2499040518990776e-06, "loss": 1.0235, "step": 20670 }, { "epoch": 2.56, "learning_rate": 3.232184602620772e-06, "loss": 1.0846, "step": 20680 }, { "epoch": 2.56, "learning_rate": 3.214510252269995e-06, "loss": 1.0727, "step": 20690 }, { "epoch": 2.56, "learning_rate": 3.19688103746478e-06, "loss": 1.0785, "step": 20700 }, { "epoch": 2.56, "learning_rate": 3.179296994729636e-06, "loss": 1.073, "step": 20710 }, { "epoch": 2.56, "learning_rate": 3.1617581604954594e-06, "loss": 1.035, "step": 20720 }, { "epoch": 2.56, "learning_rate": 3.1442645710995285e-06, "loss": 1.078, "step": 20730 }, { "epoch": 2.57, "learning_rate": 3.1268162627853315e-06, "loss": 1.0674, "step": 20740 }, { "epoch": 2.57, "learning_rate": 3.109413271702588e-06, "loss": 1.0569, "step": 20750 }, { "epoch": 2.57, "learning_rate": 3.0920556339071065e-06, "loss": 1.0716, "step": 20760 }, { "epoch": 2.57, "learning_rate": 3.0747433853607176e-06, "loss": 1.0994, "step": 20770 }, { "epoch": 2.57, "learning_rate": 3.0574765619312496e-06, "loss": 1.0685, "step": 20780 }, { "epoch": 2.57, "learning_rate": 3.0402551993923945e-06, "loss": 1.0485, "step": 20790 }, { "epoch": 2.57, "learning_rate": 3.0230793334236505e-06, "loss": 1.1089, "step": 20800 }, { "epoch": 2.57, "learning_rate": 3.0059489996102837e-06, "loss": 1.0608, "step": 20810 }, { "epoch": 2.58, "learning_rate": 2.9888642334431922e-06, "loss": 1.0617, "step": 20820 }, { "epoch": 2.58, "learning_rate": 2.9718250703188954e-06, "loss": 1.1054, "step": 20830 }, { "epoch": 2.58, "learning_rate": 2.9548315455394186e-06, "loss": 1.0773, "step": 20840 }, { "epoch": 2.58, "learning_rate": 2.9378836943122212e-06, "loss": 1.0721, "step": 20850 }, { "epoch": 2.58, "learning_rate": 2.9209815517501626e-06, "loss": 1.0407, "step": 20860 }, { "epoch": 2.58, "learning_rate": 2.9041251528713843e-06, "loss": 1.0765, "step": 20870 }, { "epoch": 2.58, "learning_rate": 2.8873145325992445e-06, "loss": 1.0721, "step": 20880 }, { "epoch": 2.58, "learning_rate": 2.870549725762289e-06, "loss": 1.0574, "step": 20890 }, { "epoch": 2.59, "learning_rate": 2.853830767094112e-06, "loss": 1.0529, "step": 20900 }, { "epoch": 2.59, "learning_rate": 2.837157691233344e-06, "loss": 1.0393, "step": 20910 }, { "epoch": 2.59, "learning_rate": 2.8205305327235505e-06, "loss": 1.0804, "step": 20920 }, { "epoch": 2.59, "learning_rate": 2.8039493260131387e-06, "loss": 1.1017, "step": 20930 }, { "epoch": 2.59, "learning_rate": 2.7874141054553577e-06, "loss": 1.0573, "step": 20940 }, { "epoch": 2.59, "learning_rate": 2.7709249053081337e-06, "loss": 1.0835, "step": 20950 }, { "epoch": 2.59, "learning_rate": 2.7544817597340846e-06, "loss": 1.0356, "step": 20960 }, { "epoch": 2.59, "learning_rate": 2.7380847028004003e-06, "loss": 1.0525, "step": 20970 }, { "epoch": 2.6, "learning_rate": 2.7217337684787623e-06, "loss": 1.0477, "step": 20980 }, { "epoch": 2.6, "learning_rate": 2.7054289906453255e-06, "loss": 1.0339, "step": 20990 }, { "epoch": 2.6, "learning_rate": 2.68917040308061e-06, "loss": 1.0462, "step": 21000 }, { "epoch": 2.6, "eval_loss": 0.9858787059783936, "eval_runtime": 5.7627, "eval_samples_per_second": 89.888, "eval_steps_per_second": 11.279, "step": 21000 }, { "epoch": 2.6, "learning_rate": 2.6729580394694142e-06, "loss": 1.0565, "step": 21010 }, { "epoch": 2.6, "learning_rate": 2.656791933400804e-06, "loss": 1.0544, "step": 21020 }, { "epoch": 2.6, "learning_rate": 2.640672118367976e-06, "loss": 1.1011, "step": 21030 }, { "epoch": 2.6, "learning_rate": 2.624598627768246e-06, "loss": 1.0566, "step": 21040 }, { "epoch": 2.6, "learning_rate": 2.6085714949029448e-06, "loss": 1.0445, "step": 21050 }, { "epoch": 2.61, "learning_rate": 2.592590752977342e-06, "loss": 1.0631, "step": 21060 }, { "epoch": 2.61, "learning_rate": 2.57665643510063e-06, "loss": 1.0176, "step": 21070 }, { "epoch": 2.61, "learning_rate": 2.560768574285774e-06, "loss": 1.0725, "step": 21080 }, { "epoch": 2.61, "learning_rate": 2.544927203449532e-06, "loss": 1.09, "step": 21090 }, { "epoch": 2.61, "learning_rate": 2.5291323554123198e-06, "loss": 1.0854, "step": 21100 }, { "epoch": 2.61, "learning_rate": 2.513384062898158e-06, "loss": 1.0571, "step": 21110 }, { "epoch": 2.61, "learning_rate": 2.4976823585346366e-06, "loss": 1.0754, "step": 21120 }, { "epoch": 2.61, "learning_rate": 2.4820272748528162e-06, "loss": 1.0497, "step": 21130 }, { "epoch": 2.62, "learning_rate": 2.4664188442871437e-06, "loss": 1.0454, "step": 21140 }, { "epoch": 2.62, "learning_rate": 2.450857099175455e-06, "loss": 1.0824, "step": 21150 }, { "epoch": 2.62, "learning_rate": 2.435342071758809e-06, "loss": 1.096, "step": 21160 }, { "epoch": 2.62, "learning_rate": 2.4198737941815237e-06, "loss": 1.039, "step": 21170 }, { "epoch": 2.62, "learning_rate": 2.4044522984910335e-06, "loss": 1.0577, "step": 21180 }, { "epoch": 2.62, "learning_rate": 2.3890776166378415e-06, "loss": 1.0258, "step": 21190 }, { "epoch": 2.62, "learning_rate": 2.373749780475487e-06, "loss": 1.0604, "step": 21200 }, { "epoch": 2.62, "learning_rate": 2.3584688217604317e-06, "loss": 1.0978, "step": 21210 }, { "epoch": 2.62, "learning_rate": 2.3432347721520282e-06, "loss": 1.1037, "step": 21220 }, { "epoch": 2.63, "learning_rate": 2.3280476632124403e-06, "loss": 1.0763, "step": 21230 }, { "epoch": 2.63, "learning_rate": 2.312907526406563e-06, "loss": 1.0386, "step": 21240 }, { "epoch": 2.63, "learning_rate": 2.2978143931020017e-06, "loss": 1.0697, "step": 21250 }, { "epoch": 2.63, "learning_rate": 2.2827682945689655e-06, "loss": 1.0423, "step": 21260 }, { "epoch": 2.63, "learning_rate": 2.2677692619802017e-06, "loss": 1.0806, "step": 21270 }, { "epoch": 2.63, "learning_rate": 2.2528173264109747e-06, "loss": 1.0404, "step": 21280 }, { "epoch": 2.63, "learning_rate": 2.2379125188389443e-06, "loss": 1.073, "step": 21290 }, { "epoch": 2.63, "learning_rate": 2.2230548701441505e-06, "loss": 1.0805, "step": 21300 }, { "epoch": 2.64, "learning_rate": 2.208244411108923e-06, "loss": 1.0579, "step": 21310 }, { "epoch": 2.64, "learning_rate": 2.1934811724178076e-06, "loss": 1.0522, "step": 21320 }, { "epoch": 2.64, "learning_rate": 2.178765184657544e-06, "loss": 1.0529, "step": 21330 }, { "epoch": 2.64, "learning_rate": 2.164096478316968e-06, "loss": 1.0804, "step": 21340 }, { "epoch": 2.64, "learning_rate": 2.1494750837869367e-06, "loss": 1.0487, "step": 21350 }, { "epoch": 2.64, "learning_rate": 2.1349010313603174e-06, "loss": 1.0244, "step": 21360 }, { "epoch": 2.64, "learning_rate": 2.120374351231866e-06, "loss": 1.1087, "step": 21370 }, { "epoch": 2.64, "learning_rate": 2.1058950734982123e-06, "loss": 1.0515, "step": 21380 }, { "epoch": 2.65, "learning_rate": 2.0914632281577694e-06, "loss": 1.0904, "step": 21390 }, { "epoch": 2.65, "learning_rate": 2.0770788451106663e-06, "loss": 1.0499, "step": 21400 }, { "epoch": 2.65, "learning_rate": 2.0627419541587286e-06, "loss": 1.0872, "step": 21410 }, { "epoch": 2.65, "learning_rate": 2.0484525850053505e-06, "loss": 1.0333, "step": 21420 }, { "epoch": 2.65, "learning_rate": 2.0342107672555013e-06, "loss": 1.057, "step": 21430 }, { "epoch": 2.65, "learning_rate": 2.0200165304156193e-06, "loss": 1.0222, "step": 21440 }, { "epoch": 2.65, "learning_rate": 2.0058699038935536e-06, "loss": 1.1043, "step": 21450 }, { "epoch": 2.65, "learning_rate": 1.9917709169985338e-06, "loss": 1.0657, "step": 21460 }, { "epoch": 2.66, "learning_rate": 1.97771959894108e-06, "loss": 1.0941, "step": 21470 }, { "epoch": 2.66, "learning_rate": 1.9637159788329363e-06, "loss": 1.0597, "step": 21480 }, { "epoch": 2.66, "learning_rate": 1.949760085687058e-06, "loss": 1.0796, "step": 21490 }, { "epoch": 2.66, "learning_rate": 1.935851948417483e-06, "loss": 1.0673, "step": 21500 }, { "epoch": 2.66, "eval_loss": 0.9846025109291077, "eval_runtime": 5.7645, "eval_samples_per_second": 89.861, "eval_steps_per_second": 11.276, "step": 21500 }, { "epoch": 2.66, "learning_rate": 1.92199159583934e-06, "loss": 1.0823, "step": 21510 }, { "epoch": 2.66, "learning_rate": 1.908179056668735e-06, "loss": 1.0519, "step": 21520 }, { "epoch": 2.66, "learning_rate": 1.8944143595227143e-06, "loss": 1.0765, "step": 21530 }, { "epoch": 2.66, "learning_rate": 1.8806975329192177e-06, "loss": 1.0861, "step": 21540 }, { "epoch": 2.67, "learning_rate": 1.8670286052769992e-06, "loss": 1.064, "step": 21550 }, { "epoch": 2.67, "learning_rate": 1.8534076049155663e-06, "loss": 1.0632, "step": 21560 }, { "epoch": 2.67, "learning_rate": 1.8398345600551481e-06, "loss": 1.0269, "step": 21570 }, { "epoch": 2.67, "learning_rate": 1.8263094988165918e-06, "loss": 1.077, "step": 21580 }, { "epoch": 2.67, "learning_rate": 1.8128324492213633e-06, "loss": 1.0659, "step": 21590 }, { "epoch": 2.67, "learning_rate": 1.799403439191441e-06, "loss": 1.0522, "step": 21600 }, { "epoch": 2.67, "learning_rate": 1.786022496549261e-06, "loss": 1.0669, "step": 21610 }, { "epoch": 2.67, "learning_rate": 1.7726896490177003e-06, "loss": 1.0454, "step": 21620 }, { "epoch": 2.68, "learning_rate": 1.7594049242199678e-06, "loss": 1.0545, "step": 21630 }, { "epoch": 2.68, "learning_rate": 1.7461683496795861e-06, "loss": 1.0455, "step": 21640 }, { "epoch": 2.68, "learning_rate": 1.7329799528203183e-06, "loss": 1.0398, "step": 21650 }, { "epoch": 2.68, "learning_rate": 1.7198397609660939e-06, "loss": 1.0348, "step": 21660 }, { "epoch": 2.68, "learning_rate": 1.7067478013409966e-06, "loss": 1.0592, "step": 21670 }, { "epoch": 2.68, "learning_rate": 1.6937041010691657e-06, "loss": 1.0702, "step": 21680 }, { "epoch": 2.68, "learning_rate": 1.6807086871747591e-06, "loss": 1.04, "step": 21690 }, { "epoch": 2.68, "learning_rate": 1.6677615865819008e-06, "loss": 1.0625, "step": 21700 }, { "epoch": 2.69, "learning_rate": 1.6548628261145977e-06, "loss": 1.0265, "step": 21710 }, { "epoch": 2.69, "learning_rate": 1.6420124324967311e-06, "loss": 1.0801, "step": 21720 }, { "epoch": 2.69, "learning_rate": 1.6292104323519658e-06, "loss": 1.116, "step": 21730 }, { "epoch": 2.69, "learning_rate": 1.6164568522036905e-06, "loss": 1.0548, "step": 21740 }, { "epoch": 2.69, "learning_rate": 1.6037517184749995e-06, "loss": 1.0789, "step": 21750 }, { "epoch": 2.69, "learning_rate": 1.5910950574885924e-06, "loss": 1.0744, "step": 21760 }, { "epoch": 2.69, "learning_rate": 1.578486895466763e-06, "loss": 1.0454, "step": 21770 }, { "epoch": 2.69, "learning_rate": 1.5659272585313161e-06, "loss": 1.056, "step": 21780 }, { "epoch": 2.7, "learning_rate": 1.5534161727035035e-06, "loss": 1.0321, "step": 21790 }, { "epoch": 2.7, "learning_rate": 1.5409536639040185e-06, "loss": 1.0956, "step": 21800 }, { "epoch": 2.7, "learning_rate": 1.528539757952896e-06, "loss": 1.0492, "step": 21810 }, { "epoch": 2.7, "learning_rate": 1.516174480569471e-06, "loss": 1.0647, "step": 21820 }, { "epoch": 2.7, "learning_rate": 1.5038578573723428e-06, "loss": 1.0394, "step": 21830 }, { "epoch": 2.7, "learning_rate": 1.4915899138792822e-06, "loss": 1.0763, "step": 21840 }, { "epoch": 2.7, "learning_rate": 1.4793706755072385e-06, "loss": 1.0714, "step": 21850 }, { "epoch": 2.7, "learning_rate": 1.467200167572233e-06, "loss": 1.0623, "step": 21860 }, { "epoch": 2.71, "learning_rate": 1.4550784152893183e-06, "loss": 1.0461, "step": 21870 }, { "epoch": 2.71, "learning_rate": 1.4430054437725577e-06, "loss": 1.0771, "step": 21880 }, { "epoch": 2.71, "learning_rate": 1.4309812780349346e-06, "loss": 1.0738, "step": 21890 }, { "epoch": 2.71, "learning_rate": 1.4190059429883162e-06, "loss": 1.0338, "step": 21900 }, { "epoch": 2.71, "learning_rate": 1.4070794634434116e-06, "loss": 1.0515, "step": 21910 }, { "epoch": 2.71, "learning_rate": 1.3952018641096852e-06, "loss": 1.0526, "step": 21920 }, { "epoch": 2.71, "learning_rate": 1.3833731695953644e-06, "loss": 1.0728, "step": 21930 }, { "epoch": 2.71, "learning_rate": 1.371593404407337e-06, "loss": 1.0515, "step": 21940 }, { "epoch": 2.72, "learning_rate": 1.3598625929511082e-06, "loss": 1.1174, "step": 21950 }, { "epoch": 2.72, "learning_rate": 1.3481807595307843e-06, "loss": 1.0738, "step": 21960 }, { "epoch": 2.72, "learning_rate": 1.336547928348972e-06, "loss": 1.0821, "step": 21970 }, { "epoch": 2.72, "learning_rate": 1.3249641235067777e-06, "loss": 1.0564, "step": 21980 }, { "epoch": 2.72, "learning_rate": 1.313429369003724e-06, "loss": 1.0567, "step": 21990 }, { "epoch": 2.72, "learning_rate": 1.3019436887377001e-06, "loss": 1.0639, "step": 22000 }, { "epoch": 2.72, "eval_loss": 0.9853225946426392, "eval_runtime": 5.7651, "eval_samples_per_second": 89.85, "eval_steps_per_second": 11.275, "step": 22000 }, { "epoch": 2.72, "learning_rate": 1.290507106504943e-06, "loss": 1.0533, "step": 22010 }, { "epoch": 2.72, "learning_rate": 1.2791196459999516e-06, "loss": 1.0626, "step": 22020 }, { "epoch": 2.73, "learning_rate": 1.2677813308154623e-06, "loss": 1.0391, "step": 22030 }, { "epoch": 2.73, "learning_rate": 1.256492184442387e-06, "loss": 1.0641, "step": 22040 }, { "epoch": 2.73, "learning_rate": 1.245252230269761e-06, "loss": 1.062, "step": 22050 }, { "epoch": 2.73, "learning_rate": 1.234061491584723e-06, "loss": 1.0546, "step": 22060 }, { "epoch": 2.73, "learning_rate": 1.2229199915724354e-06, "loss": 1.0887, "step": 22070 }, { "epoch": 2.73, "learning_rate": 1.2118277533160333e-06, "loss": 1.0865, "step": 22080 }, { "epoch": 2.73, "learning_rate": 1.2007847997966142e-06, "loss": 1.0676, "step": 22090 }, { "epoch": 2.73, "learning_rate": 1.1897911538931495e-06, "loss": 1.0555, "step": 22100 }, { "epoch": 2.73, "learning_rate": 1.1788468383824637e-06, "loss": 1.051, "step": 22110 }, { "epoch": 2.74, "learning_rate": 1.16795187593918e-06, "loss": 1.0389, "step": 22120 }, { "epoch": 2.74, "learning_rate": 1.157106289135651e-06, "loss": 1.0336, "step": 22130 }, { "epoch": 2.74, "learning_rate": 1.146310100441958e-06, "loss": 1.0335, "step": 22140 }, { "epoch": 2.74, "learning_rate": 1.1355633322258253e-06, "loss": 1.0916, "step": 22150 }, { "epoch": 2.74, "learning_rate": 1.1248660067525868e-06, "loss": 1.0762, "step": 22160 }, { "epoch": 2.74, "learning_rate": 1.1142181461851475e-06, "loss": 1.0352, "step": 22170 }, { "epoch": 2.74, "learning_rate": 1.103619772583911e-06, "loss": 1.0077, "step": 22180 }, { "epoch": 2.74, "learning_rate": 1.093070907906782e-06, "loss": 1.0364, "step": 22190 }, { "epoch": 2.75, "learning_rate": 1.0825715740090726e-06, "loss": 1.0451, "step": 22200 }, { "epoch": 2.75, "learning_rate": 1.0721217926434712e-06, "loss": 1.0545, "step": 22210 }, { "epoch": 2.75, "learning_rate": 1.061721585460021e-06, "loss": 1.0817, "step": 22220 }, { "epoch": 2.75, "learning_rate": 1.051370974006044e-06, "loss": 1.0696, "step": 22230 }, { "epoch": 2.75, "learning_rate": 1.0410699797261115e-06, "loss": 1.0869, "step": 22240 }, { "epoch": 2.75, "learning_rate": 1.0308186239619989e-06, "loss": 1.057, "step": 22250 }, { "epoch": 2.75, "learning_rate": 1.0206169279526335e-06, "loss": 1.1109, "step": 22260 }, { "epoch": 2.75, "learning_rate": 1.0104649128340664e-06, "loss": 1.0801, "step": 22270 }, { "epoch": 2.76, "learning_rate": 1.0003625996394122e-06, "loss": 1.053, "step": 22280 }, { "epoch": 2.76, "learning_rate": 9.903100092988144e-07, "loss": 1.0714, "step": 22290 }, { "epoch": 2.76, "learning_rate": 9.803071626394018e-07, "loss": 1.0692, "step": 22300 }, { "epoch": 2.76, "learning_rate": 9.7035408038523e-07, "loss": 1.074, "step": 22310 }, { "epoch": 2.76, "learning_rate": 9.604507831572767e-07, "loss": 1.0702, "step": 22320 }, { "epoch": 2.76, "learning_rate": 9.505972914733568e-07, "loss": 1.0599, "step": 22330 }, { "epoch": 2.76, "learning_rate": 9.407936257480932e-07, "loss": 1.08, "step": 22340 }, { "epoch": 2.76, "learning_rate": 9.310398062928938e-07, "loss": 1.0631, "step": 22350 }, { "epoch": 2.77, "learning_rate": 9.213358533158856e-07, "loss": 1.0564, "step": 22360 }, { "epoch": 2.77, "learning_rate": 9.116817869218808e-07, "loss": 1.0758, "step": 22370 }, { "epoch": 2.77, "learning_rate": 9.02077627112341e-07, "loss": 1.0887, "step": 22380 }, { "epoch": 2.77, "learning_rate": 8.925233937853161e-07, "loss": 1.0684, "step": 22390 }, { "epoch": 2.77, "learning_rate": 8.830191067354387e-07, "loss": 1.0976, "step": 22400 }, { "epoch": 2.77, "learning_rate": 8.735647856538492e-07, "loss": 1.06, "step": 22410 }, { "epoch": 2.77, "learning_rate": 8.641604501281625e-07, "loss": 1.0518, "step": 22420 }, { "epoch": 2.77, "learning_rate": 8.548061196424512e-07, "loss": 1.0146, "step": 22430 }, { "epoch": 2.78, "learning_rate": 8.455018135771625e-07, "loss": 1.0497, "step": 22440 }, { "epoch": 2.78, "learning_rate": 8.362475512091267e-07, "loss": 1.0652, "step": 22450 }, { "epoch": 2.78, "learning_rate": 8.270433517114761e-07, "loss": 1.0454, "step": 22460 }, { "epoch": 2.78, "learning_rate": 8.178892341536209e-07, "loss": 1.0681, "step": 22470 }, { "epoch": 2.78, "learning_rate": 8.087852175012262e-07, "loss": 1.0747, "step": 22480 }, { "epoch": 2.78, "learning_rate": 7.997313206161427e-07, "loss": 1.0458, "step": 22490 }, { "epoch": 2.78, "learning_rate": 7.907275622563854e-07, "loss": 1.0571, "step": 22500 }, { "epoch": 2.78, "eval_loss": 0.9861387610435486, "eval_runtime": 5.7641, "eval_samples_per_second": 89.866, "eval_steps_per_second": 11.277, "step": 22500 }, { "epoch": 2.78, "learning_rate": 7.817739610760988e-07, "loss": 1.0783, "step": 22510 }, { "epoch": 2.79, "learning_rate": 7.728705356254973e-07, "loss": 1.081, "step": 22520 }, { "epoch": 2.79, "learning_rate": 7.640173043508503e-07, "loss": 1.0371, "step": 22530 }, { "epoch": 2.79, "learning_rate": 7.552142855944383e-07, "loss": 1.0232, "step": 22540 }, { "epoch": 2.79, "learning_rate": 7.464614975944945e-07, "loss": 1.0483, "step": 22550 }, { "epoch": 2.79, "learning_rate": 7.37758958485199e-07, "loss": 1.0735, "step": 22560 }, { "epoch": 2.79, "learning_rate": 7.291066862966184e-07, "loss": 1.0674, "step": 22570 }, { "epoch": 2.79, "learning_rate": 7.205046989546771e-07, "loss": 1.0794, "step": 22580 }, { "epoch": 2.79, "learning_rate": 7.119530142811165e-07, "loss": 1.053, "step": 22590 }, { "epoch": 2.8, "learning_rate": 7.034516499934584e-07, "loss": 1.0833, "step": 22600 }, { "epoch": 2.8, "learning_rate": 6.950006237049777e-07, "loss": 1.0694, "step": 22610 }, { "epoch": 2.8, "learning_rate": 6.865999529246519e-07, "loss": 1.0504, "step": 22620 }, { "epoch": 2.8, "learning_rate": 6.78249655057131e-07, "loss": 1.0902, "step": 22630 }, { "epoch": 2.8, "learning_rate": 6.699497474027066e-07, "loss": 1.0629, "step": 22640 }, { "epoch": 2.8, "learning_rate": 6.617002471572626e-07, "loss": 1.0534, "step": 22650 }, { "epoch": 2.8, "learning_rate": 6.535011714122546e-07, "loss": 1.0808, "step": 22660 }, { "epoch": 2.8, "learning_rate": 6.453525371546725e-07, "loss": 1.0294, "step": 22670 }, { "epoch": 2.81, "learning_rate": 6.37254361266984e-07, "loss": 1.0225, "step": 22680 }, { "epoch": 2.81, "learning_rate": 6.292066605271319e-07, "loss": 1.0803, "step": 22690 }, { "epoch": 2.81, "learning_rate": 6.212094516084815e-07, "loss": 1.076, "step": 22700 }, { "epoch": 2.81, "learning_rate": 6.132627510797795e-07, "loss": 1.0371, "step": 22710 }, { "epoch": 2.81, "learning_rate": 6.053665754051391e-07, "loss": 1.0625, "step": 22720 }, { "epoch": 2.81, "learning_rate": 5.975209409439852e-07, "loss": 1.0362, "step": 22730 }, { "epoch": 2.81, "learning_rate": 5.897258639510406e-07, "loss": 1.0622, "step": 22740 }, { "epoch": 2.81, "learning_rate": 5.819813605762726e-07, "loss": 1.0352, "step": 22750 }, { "epoch": 2.82, "learning_rate": 5.742874468648796e-07, "loss": 1.0676, "step": 22760 }, { "epoch": 2.82, "learning_rate": 5.666441387572413e-07, "loss": 1.0404, "step": 22770 }, { "epoch": 2.82, "learning_rate": 5.590514520888879e-07, "loss": 1.053, "step": 22780 }, { "epoch": 2.82, "learning_rate": 5.515094025904777e-07, "loss": 1.0321, "step": 22790 }, { "epoch": 2.82, "learning_rate": 5.440180058877615e-07, "loss": 1.0924, "step": 22800 }, { "epoch": 2.82, "learning_rate": 5.36577277501532e-07, "loss": 1.1155, "step": 22810 }, { "epoch": 2.82, "learning_rate": 5.291872328476222e-07, "loss": 1.094, "step": 22820 }, { "epoch": 2.82, "learning_rate": 5.218478872368509e-07, "loss": 1.0211, "step": 22830 }, { "epoch": 2.83, "learning_rate": 5.145592558749968e-07, "loss": 1.0557, "step": 22840 }, { "epoch": 2.83, "learning_rate": 5.073213538627747e-07, "loss": 1.0563, "step": 22850 }, { "epoch": 2.83, "learning_rate": 5.001341961957811e-07, "loss": 1.0422, "step": 22860 }, { "epoch": 2.83, "learning_rate": 4.929977977644995e-07, "loss": 1.0748, "step": 22870 }, { "epoch": 2.83, "learning_rate": 4.859121733542415e-07, "loss": 1.0407, "step": 22880 }, { "epoch": 2.83, "learning_rate": 4.788773376451172e-07, "loss": 1.0422, "step": 22890 }, { "epoch": 2.83, "learning_rate": 4.7189330521202336e-07, "loss": 1.0907, "step": 22900 }, { "epoch": 2.83, "learning_rate": 4.64960090524591e-07, "loss": 1.0633, "step": 22910 }, { "epoch": 2.84, "learning_rate": 4.580777079471771e-07, "loss": 1.0597, "step": 22920 }, { "epoch": 2.84, "learning_rate": 4.5124617173881724e-07, "loss": 1.0579, "step": 22930 }, { "epoch": 2.84, "learning_rate": 4.44465496053198e-07, "loss": 1.0928, "step": 22940 }, { "epoch": 2.84, "learning_rate": 4.3773569493864295e-07, "loss": 1.0277, "step": 22950 }, { "epoch": 2.84, "learning_rate": 4.310567823380657e-07, "loss": 1.0687, "step": 22960 }, { "epoch": 2.84, "learning_rate": 4.2442877208895004e-07, "loss": 1.0341, "step": 22970 }, { "epoch": 2.84, "learning_rate": 4.1785167792332546e-07, "loss": 1.0519, "step": 22980 }, { "epoch": 2.84, "learning_rate": 4.1132551346771406e-07, "loss": 1.0239, "step": 22990 }, { "epoch": 2.85, "learning_rate": 4.0485029224314465e-07, "loss": 1.0654, "step": 23000 }, { "epoch": 2.85, "eval_loss": 0.9845592975616455, "eval_runtime": 5.7608, "eval_samples_per_second": 89.918, "eval_steps_per_second": 11.283, "step": 23000 }, { "epoch": 2.85, "learning_rate": 3.984260276650886e-07, "loss": 1.0744, "step": 23010 }, { "epoch": 2.85, "learning_rate": 3.920527330434409e-07, "loss": 1.0382, "step": 23020 }, { "epoch": 2.85, "learning_rate": 3.8573042158250307e-07, "loss": 1.0473, "step": 23030 }, { "epoch": 2.85, "learning_rate": 3.7945910638094716e-07, "loss": 1.0671, "step": 23040 }, { "epoch": 2.85, "learning_rate": 3.7323880043179105e-07, "loss": 1.0436, "step": 23050 }, { "epoch": 2.85, "learning_rate": 3.6706951662237033e-07, "loss": 1.0827, "step": 23060 }, { "epoch": 2.85, "learning_rate": 3.609512677343052e-07, "loss": 1.0541, "step": 23070 }, { "epoch": 2.85, "learning_rate": 3.548840664434949e-07, "loss": 1.0506, "step": 23080 }, { "epoch": 2.86, "learning_rate": 3.4886792532006497e-07, "loss": 1.0357, "step": 23090 }, { "epoch": 2.86, "learning_rate": 3.4290285682835875e-07, "loss": 1.0679, "step": 23100 }, { "epoch": 2.86, "learning_rate": 3.3698887332691e-07, "loss": 1.0591, "step": 23110 }, { "epoch": 2.86, "learning_rate": 3.311259870684008e-07, "loss": 1.0922, "step": 23120 }, { "epoch": 2.86, "learning_rate": 3.253142101996648e-07, "loss": 1.0554, "step": 23130 }, { "epoch": 2.86, "learning_rate": 3.1955355476163964e-07, "loss": 1.0365, "step": 23140 }, { "epoch": 2.86, "learning_rate": 3.138440326893449e-07, "loss": 1.0632, "step": 23150 }, { "epoch": 2.86, "learning_rate": 3.0818565581186564e-07, "loss": 1.0702, "step": 23160 }, { "epoch": 2.87, "learning_rate": 3.0257843585232705e-07, "loss": 1.0591, "step": 23170 }, { "epoch": 2.87, "learning_rate": 2.970223844278558e-07, "loss": 1.0756, "step": 23180 }, { "epoch": 2.87, "learning_rate": 2.915175130495773e-07, "loss": 1.0669, "step": 23190 }, { "epoch": 2.87, "learning_rate": 2.86063833122574e-07, "loss": 1.1059, "step": 23200 }, { "epoch": 2.87, "learning_rate": 2.8066135594587705e-07, "loss": 1.0326, "step": 23210 }, { "epoch": 2.87, "learning_rate": 2.753100927124247e-07, "loss": 1.0812, "step": 23220 }, { "epoch": 2.87, "learning_rate": 2.7001005450905394e-07, "loss": 1.0917, "step": 23230 }, { "epoch": 2.87, "learning_rate": 2.647612523164811e-07, "loss": 1.0918, "step": 23240 }, { "epoch": 2.88, "learning_rate": 2.5956369700925187e-07, "loss": 1.0586, "step": 23250 }, { "epoch": 2.88, "learning_rate": 2.544173993557608e-07, "loss": 1.0021, "step": 23260 }, { "epoch": 2.88, "learning_rate": 2.4932237001818737e-07, "loss": 1.0642, "step": 23270 }, { "epoch": 2.88, "learning_rate": 2.4427861955249877e-07, "loss": 1.11, "step": 23280 }, { "epoch": 2.88, "learning_rate": 2.3928615840842495e-07, "loss": 1.0417, "step": 23290 }, { "epoch": 2.88, "learning_rate": 2.343449969294309e-07, "loss": 1.0623, "step": 23300 }, { "epoch": 2.88, "learning_rate": 2.294551453526972e-07, "loss": 1.0661, "step": 23310 }, { "epoch": 2.88, "learning_rate": 2.2461661380910327e-07, "loss": 1.0312, "step": 23320 }, { "epoch": 2.89, "learning_rate": 2.1982941232319698e-07, "loss": 1.05, "step": 23330 }, { "epoch": 2.89, "learning_rate": 2.1509355081318905e-07, "loss": 1.0518, "step": 23340 }, { "epoch": 2.89, "learning_rate": 2.1040903909091136e-07, "loss": 1.0555, "step": 23350 }, { "epoch": 2.89, "learning_rate": 2.0577588686181705e-07, "loss": 1.0714, "step": 23360 }, { "epoch": 2.89, "learning_rate": 2.0119410372494707e-07, "loss": 1.06, "step": 23370 }, { "epoch": 2.89, "learning_rate": 1.9666369917291927e-07, "loss": 1.0408, "step": 23380 }, { "epoch": 2.89, "learning_rate": 1.9218468259190324e-07, "loss": 1.0791, "step": 23390 }, { "epoch": 2.89, "learning_rate": 1.8775706326160104e-07, "loss": 1.0628, "step": 23400 }, { "epoch": 2.9, "learning_rate": 1.833808503552248e-07, "loss": 1.0807, "step": 23410 }, { "epoch": 2.9, "learning_rate": 1.7905605293948857e-07, "loss": 1.0926, "step": 23420 }, { "epoch": 2.9, "learning_rate": 1.747826799745833e-07, "loss": 1.0784, "step": 23430 }, { "epoch": 2.9, "learning_rate": 1.705607403141546e-07, "loss": 1.0559, "step": 23440 }, { "epoch": 2.9, "learning_rate": 1.6639024270528603e-07, "loss": 1.0738, "step": 23450 }, { "epoch": 2.9, "learning_rate": 1.6227119578848537e-07, "loss": 1.0601, "step": 23460 }, { "epoch": 2.9, "learning_rate": 1.582036080976651e-07, "loss": 1.0578, "step": 23470 }, { "epoch": 2.9, "learning_rate": 1.5418748806012295e-07, "loss": 1.0649, "step": 23480 }, { "epoch": 2.91, "learning_rate": 1.5022284399651975e-07, "loss": 1.101, "step": 23490 }, { "epoch": 2.91, "learning_rate": 1.4630968412087386e-07, "loss": 1.0464, "step": 23500 }, { "epoch": 2.91, "eval_loss": 0.9841265678405762, "eval_runtime": 5.7616, "eval_samples_per_second": 89.905, "eval_steps_per_second": 11.282, "step": 23500 }, { "epoch": 2.91, "learning_rate": 1.4244801654053896e-07, "loss": 1.0603, "step": 23510 }, { "epoch": 2.91, "learning_rate": 1.3863784925618184e-07, "loss": 1.084, "step": 23520 }, { "epoch": 2.91, "learning_rate": 1.348791901617713e-07, "loss": 1.0628, "step": 23530 }, { "epoch": 2.91, "learning_rate": 1.3117204704455877e-07, "loss": 1.0322, "step": 23540 }, { "epoch": 2.91, "learning_rate": 1.275164275850671e-07, "loss": 1.0576, "step": 23550 }, { "epoch": 2.91, "learning_rate": 1.2391233935707126e-07, "loss": 1.0826, "step": 23560 }, { "epoch": 2.92, "learning_rate": 1.2035978982758155e-07, "loss": 1.0501, "step": 23570 }, { "epoch": 2.92, "learning_rate": 1.1685878635683267e-07, "loss": 1.0405, "step": 23580 }, { "epoch": 2.92, "learning_rate": 1.1340933619825577e-07, "loss": 1.1107, "step": 23590 }, { "epoch": 2.92, "learning_rate": 1.1001144649848693e-07, "loss": 1.0154, "step": 23600 }, { "epoch": 2.92, "learning_rate": 1.0666512429732823e-07, "loss": 1.0645, "step": 23610 }, { "epoch": 2.92, "learning_rate": 1.0337037652774496e-07, "loss": 1.0655, "step": 23620 }, { "epoch": 2.92, "learning_rate": 1.0012721001585734e-07, "loss": 1.072, "step": 23630 }, { "epoch": 2.92, "learning_rate": 9.693563148090723e-08, "loss": 1.0466, "step": 23640 }, { "epoch": 2.93, "learning_rate": 9.379564753526082e-08, "loss": 1.0613, "step": 23650 }, { "epoch": 2.93, "learning_rate": 9.070726468439206e-08, "loss": 1.0306, "step": 23660 }, { "epoch": 2.93, "learning_rate": 8.767048932686318e-08, "loss": 1.0568, "step": 23670 }, { "epoch": 2.93, "learning_rate": 8.468532775431914e-08, "loss": 1.0501, "step": 23680 }, { "epoch": 2.93, "learning_rate": 8.17517861514655e-08, "loss": 1.0954, "step": 23690 }, { "epoch": 2.93, "learning_rate": 7.886987059606277e-08, "loss": 1.0636, "step": 23700 }, { "epoch": 2.93, "learning_rate": 7.603958705891535e-08, "loss": 1.0391, "step": 23710 }, { "epoch": 2.93, "learning_rate": 7.326094140384932e-08, "loss": 1.0617, "step": 23720 }, { "epoch": 2.94, "learning_rate": 7.053393938771246e-08, "loss": 1.0403, "step": 23730 }, { "epoch": 2.94, "learning_rate": 6.785858666035205e-08, "loss": 1.079, "step": 23740 }, { "epoch": 2.94, "learning_rate": 6.523488876460648e-08, "loss": 1.0704, "step": 23750 }, { "epoch": 2.94, "learning_rate": 6.266285113630254e-08, "loss": 1.0731, "step": 23760 }, { "epoch": 2.94, "learning_rate": 6.014247910423044e-08, "loss": 1.0713, "step": 23770 }, { "epoch": 2.94, "learning_rate": 5.767377789013539e-08, "loss": 1.0578, "step": 23780 }, { "epoch": 2.94, "learning_rate": 5.525675260871776e-08, "loss": 1.0642, "step": 23790 }, { "epoch": 2.94, "learning_rate": 5.289140826760797e-08, "loss": 1.0856, "step": 23800 }, { "epoch": 2.95, "learning_rate": 5.057774976737206e-08, "loss": 1.0541, "step": 23810 }, { "epoch": 2.95, "learning_rate": 4.8315781901481225e-08, "loss": 1.0546, "step": 23820 }, { "epoch": 2.95, "learning_rate": 4.6105509356322854e-08, "loss": 1.041, "step": 23830 }, { "epoch": 2.95, "learning_rate": 4.394693671117278e-08, "loss": 1.0959, "step": 23840 }, { "epoch": 2.95, "learning_rate": 4.184006843820087e-08, "loss": 1.0648, "step": 23850 }, { "epoch": 2.95, "learning_rate": 3.9784908902454345e-08, "loss": 1.0374, "step": 23860 }, { "epoch": 2.95, "learning_rate": 3.778146236184388e-08, "loss": 1.0584, "step": 23870 }, { "epoch": 2.95, "learning_rate": 3.5829732967143646e-08, "loss": 1.0565, "step": 23880 }, { "epoch": 2.96, "learning_rate": 3.3929724761980197e-08, "loss": 1.0571, "step": 23890 }, { "epoch": 2.96, "learning_rate": 3.2081441682826895e-08, "loss": 1.0379, "step": 23900 }, { "epoch": 2.96, "learning_rate": 3.0284887558981735e-08, "loss": 1.0255, "step": 23910 }, { "epoch": 2.96, "learning_rate": 2.8540066112581198e-08, "loss": 1.0542, "step": 23920 }, { "epoch": 2.96, "learning_rate": 2.6846980958569745e-08, "loss": 1.054, "step": 23930 }, { "epoch": 2.96, "learning_rate": 2.5205635604713673e-08, "loss": 1.066, "step": 23940 }, { "epoch": 2.96, "learning_rate": 2.3616033451581698e-08, "loss": 1.0623, "step": 23950 }, { "epoch": 2.96, "learning_rate": 2.207817779253385e-08, "loss": 1.0531, "step": 23960 }, { "epoch": 2.97, "learning_rate": 2.0592071813727022e-08, "loss": 1.0666, "step": 23970 }, { "epoch": 2.97, "learning_rate": 1.9157718594103868e-08, "loss": 1.0586, "step": 23980 }, { "epoch": 2.97, "learning_rate": 1.7775121105378933e-08, "loss": 1.0567, "step": 23990 }, { "epoch": 2.97, "learning_rate": 1.6444282212041416e-08, "loss": 1.0682, "step": 24000 }, { "epoch": 2.97, "eval_loss": 0.9839575290679932, "eval_runtime": 5.7619, "eval_samples_per_second": 89.901, "eval_steps_per_second": 11.281, "step": 24000 } ], "max_steps": 24252, "num_train_epochs": 3, "total_flos": 3.86042215804416e+17, "trial_name": null, "trial_params": null }