{ "best_metric": 0.3574163317680359, "best_model_checkpoint": "OutModelPolicy\\checkpoint-132696", "epoch": 1.0, "global_step": 132696, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9962395249291615e-05, "loss": 0.5169, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.992479049858323e-05, "loss": 0.4445, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.9887110387652982e-05, "loss": 0.4321, "step": 1500 }, { "epoch": 0.02, "learning_rate": 1.9849505636944596e-05, "loss": 0.4343, "step": 2000 }, { "epoch": 0.02, "learning_rate": 1.981182552601435e-05, "loss": 0.4072, "step": 2500 }, { "epoch": 0.02, "learning_rate": 1.9774145415084102e-05, "loss": 0.4311, "step": 3000 }, { "epoch": 0.03, "learning_rate": 1.973646530415386e-05, "loss": 0.411, "step": 3500 }, { "epoch": 0.03, "learning_rate": 1.9698785193223612e-05, "loss": 0.4154, "step": 4000 }, { "epoch": 0.03, "learning_rate": 1.9661105082293365e-05, "loss": 0.3951, "step": 4500 }, { "epoch": 0.04, "learning_rate": 1.962350033158498e-05, "loss": 0.3974, "step": 5000 }, { "epoch": 0.04, "learning_rate": 1.9585820220654732e-05, "loss": 0.4036, "step": 5500 }, { "epoch": 0.05, "learning_rate": 1.9548140109724485e-05, "loss": 0.3984, "step": 6000 }, { "epoch": 0.05, "learning_rate": 1.9510459998794238e-05, "loss": 0.4028, "step": 6500 }, { "epoch": 0.05, "learning_rate": 1.947277988786399e-05, "loss": 0.3995, "step": 7000 }, { "epoch": 0.06, "learning_rate": 1.9435099776933744e-05, "loss": 0.4006, "step": 7500 }, { "epoch": 0.06, "learning_rate": 1.9397419666003497e-05, "loss": 0.404, "step": 8000 }, { "epoch": 0.06, "learning_rate": 1.935973955507325e-05, "loss": 0.384, "step": 8500 }, { "epoch": 0.07, "learning_rate": 1.9322210164586725e-05, "loss": 0.3871, "step": 9000 }, { "epoch": 0.07, "learning_rate": 1.928453005365648e-05, "loss": 0.3972, "step": 9500 }, { "epoch": 0.08, "learning_rate": 1.9246849942726234e-05, "loss": 0.4043, "step": 10000 }, { "epoch": 0.08, "learning_rate": 1.9209169831795988e-05, "loss": 0.3951, "step": 10500 }, { "epoch": 0.08, "learning_rate": 1.917148972086574e-05, "loss": 0.4002, "step": 11000 }, { "epoch": 0.09, "learning_rate": 1.9133884970157354e-05, "loss": 0.3953, "step": 11500 }, { "epoch": 0.09, "learning_rate": 1.9096204859227107e-05, "loss": 0.381, "step": 12000 }, { "epoch": 0.09, "learning_rate": 1.905852474829686e-05, "loss": 0.386, "step": 12500 }, { "epoch": 0.1, "learning_rate": 1.9020844637366614e-05, "loss": 0.3779, "step": 13000 }, { "epoch": 0.1, "learning_rate": 1.8983164526436367e-05, "loss": 0.3861, "step": 13500 }, { "epoch": 0.11, "learning_rate": 1.894548441550612e-05, "loss": 0.3733, "step": 14000 }, { "epoch": 0.11, "learning_rate": 1.8907879664797734e-05, "loss": 0.392, "step": 14500 }, { "epoch": 0.11, "learning_rate": 1.8870199553867487e-05, "loss": 0.368, "step": 15000 }, { "epoch": 0.12, "learning_rate": 1.8832519442937243e-05, "loss": 0.3786, "step": 15500 }, { "epoch": 0.12, "learning_rate": 1.8794914692228857e-05, "loss": 0.3836, "step": 16000 }, { "epoch": 0.12, "learning_rate": 1.875723458129861e-05, "loss": 0.3711, "step": 16500 }, { "epoch": 0.13, "learning_rate": 1.8719554470368363e-05, "loss": 0.3876, "step": 17000 }, { "epoch": 0.13, "learning_rate": 1.8681874359438113e-05, "loss": 0.385, "step": 17500 }, { "epoch": 0.14, "learning_rate": 1.864419424850787e-05, "loss": 0.3894, "step": 18000 }, { "epoch": 0.14, "learning_rate": 1.8606514137577623e-05, "loss": 0.3819, "step": 18500 }, { "epoch": 0.14, "learning_rate": 1.8568834026647376e-05, "loss": 0.3774, "step": 19000 }, { "epoch": 0.15, "learning_rate": 1.853115391571713e-05, "loss": 0.371, "step": 19500 }, { "epoch": 0.15, "learning_rate": 1.8493473804786882e-05, "loss": 0.3722, "step": 20000 }, { "epoch": 0.15, "learning_rate": 1.8455793693856635e-05, "loss": 0.3699, "step": 20500 }, { "epoch": 0.16, "learning_rate": 1.841818894314825e-05, "loss": 0.3763, "step": 21000 }, { "epoch": 0.16, "learning_rate": 1.8380508832218002e-05, "loss": 0.381, "step": 21500 }, { "epoch": 0.17, "learning_rate": 1.834282872128776e-05, "loss": 0.3757, "step": 22000 }, { "epoch": 0.17, "learning_rate": 1.830514861035751e-05, "loss": 0.3871, "step": 22500 }, { "epoch": 0.17, "learning_rate": 1.8267543859649125e-05, "loss": 0.3703, "step": 23000 }, { "epoch": 0.18, "learning_rate": 1.8229863748718875e-05, "loss": 0.3697, "step": 23500 }, { "epoch": 0.18, "learning_rate": 1.8192258998010492e-05, "loss": 0.3773, "step": 24000 }, { "epoch": 0.18, "learning_rate": 1.8154578887080245e-05, "loss": 0.3627, "step": 24500 }, { "epoch": 0.19, "learning_rate": 1.811689877615e-05, "loss": 0.3855, "step": 25000 }, { "epoch": 0.19, "learning_rate": 1.807921866521975e-05, "loss": 0.3775, "step": 25500 }, { "epoch": 0.2, "learning_rate": 1.8041538554289505e-05, "loss": 0.3787, "step": 26000 }, { "epoch": 0.2, "learning_rate": 1.8003858443359258e-05, "loss": 0.3793, "step": 26500 }, { "epoch": 0.2, "learning_rate": 1.796617833242901e-05, "loss": 0.376, "step": 27000 }, { "epoch": 0.21, "learning_rate": 1.7928498221498764e-05, "loss": 0.3898, "step": 27500 }, { "epoch": 0.21, "learning_rate": 1.789081811056852e-05, "loss": 0.373, "step": 28000 }, { "epoch": 0.21, "learning_rate": 1.7853137999638274e-05, "loss": 0.3855, "step": 28500 }, { "epoch": 0.22, "learning_rate": 1.7815533248929887e-05, "loss": 0.3739, "step": 29000 }, { "epoch": 0.22, "learning_rate": 1.7777853137999637e-05, "loss": 0.3711, "step": 29500 }, { "epoch": 0.23, "learning_rate": 1.7740173027069394e-05, "loss": 0.3787, "step": 30000 }, { "epoch": 0.23, "learning_rate": 1.7702492916139147e-05, "loss": 0.3729, "step": 30500 }, { "epoch": 0.23, "learning_rate": 1.76648128052089e-05, "loss": 0.3771, "step": 31000 }, { "epoch": 0.24, "learning_rate": 1.7627208054500514e-05, "loss": 0.3706, "step": 31500 }, { "epoch": 0.24, "learning_rate": 1.7589527943570267e-05, "loss": 0.3685, "step": 32000 }, { "epoch": 0.24, "learning_rate": 1.755184783264002e-05, "loss": 0.3774, "step": 32500 }, { "epoch": 0.25, "learning_rate": 1.7514167721709773e-05, "loss": 0.3599, "step": 33000 }, { "epoch": 0.25, "learning_rate": 1.7476487610779526e-05, "loss": 0.3626, "step": 33500 }, { "epoch": 0.26, "learning_rate": 1.7438807499849283e-05, "loss": 0.3735, "step": 34000 }, { "epoch": 0.26, "learning_rate": 1.7401202749140896e-05, "loss": 0.3605, "step": 34500 }, { "epoch": 0.26, "learning_rate": 1.736352263821065e-05, "loss": 0.3687, "step": 35000 }, { "epoch": 0.27, "learning_rate": 1.7325842527280403e-05, "loss": 0.3624, "step": 35500 }, { "epoch": 0.27, "learning_rate": 1.7288162416350156e-05, "loss": 0.3792, "step": 36000 }, { "epoch": 0.28, "learning_rate": 1.725048230541991e-05, "loss": 0.3629, "step": 36500 }, { "epoch": 0.28, "learning_rate": 1.7212802194489662e-05, "loss": 0.3734, "step": 37000 }, { "epoch": 0.28, "learning_rate": 1.7175122083559415e-05, "loss": 0.3696, "step": 37500 }, { "epoch": 0.29, "learning_rate": 1.7137441972629168e-05, "loss": 0.3728, "step": 38000 }, { "epoch": 0.29, "learning_rate": 1.7099837221920782e-05, "loss": 0.365, "step": 38500 }, { "epoch": 0.29, "learning_rate": 1.7062157110990535e-05, "loss": 0.3769, "step": 39000 }, { "epoch": 0.3, "learning_rate": 1.702455236028215e-05, "loss": 0.3594, "step": 39500 }, { "epoch": 0.3, "learning_rate": 1.6986872249351905e-05, "loss": 0.3669, "step": 40000 }, { "epoch": 0.31, "learning_rate": 1.694919213842166e-05, "loss": 0.3653, "step": 40500 }, { "epoch": 0.31, "learning_rate": 1.691151202749141e-05, "loss": 0.3586, "step": 41000 }, { "epoch": 0.31, "learning_rate": 1.6873831916561165e-05, "loss": 0.3568, "step": 41500 }, { "epoch": 0.32, "learning_rate": 1.6836151805630918e-05, "loss": 0.3669, "step": 42000 }, { "epoch": 0.32, "learning_rate": 1.679854705492253e-05, "loss": 0.376, "step": 42500 }, { "epoch": 0.32, "learning_rate": 1.6760866943992285e-05, "loss": 0.3683, "step": 43000 }, { "epoch": 0.33, "learning_rate": 1.6723186833062038e-05, "loss": 0.3652, "step": 43500 }, { "epoch": 0.33, "learning_rate": 1.668550672213179e-05, "loss": 0.354, "step": 44000 }, { "epoch": 0.34, "learning_rate": 1.6647826611201544e-05, "loss": 0.3674, "step": 44500 }, { "epoch": 0.34, "learning_rate": 1.6610146500271297e-05, "loss": 0.3621, "step": 45000 }, { "epoch": 0.34, "learning_rate": 1.657246638934105e-05, "loss": 0.3779, "step": 45500 }, { "epoch": 0.35, "learning_rate": 1.6534786278410807e-05, "loss": 0.3569, "step": 46000 }, { "epoch": 0.35, "learning_rate": 1.649710616748056e-05, "loss": 0.3749, "step": 46500 }, { "epoch": 0.35, "learning_rate": 1.6459501416772174e-05, "loss": 0.3741, "step": 47000 }, { "epoch": 0.36, "learning_rate": 1.6421821305841927e-05, "loss": 0.3466, "step": 47500 }, { "epoch": 0.36, "learning_rate": 1.638414119491168e-05, "loss": 0.3594, "step": 48000 }, { "epoch": 0.37, "learning_rate": 1.6346536444203294e-05, "loss": 0.3715, "step": 48500 }, { "epoch": 0.37, "learning_rate": 1.6308856333273047e-05, "loss": 0.3685, "step": 49000 }, { "epoch": 0.37, "learning_rate": 1.62711762223428e-05, "loss": 0.3563, "step": 49500 }, { "epoch": 0.38, "learning_rate": 1.6233496111412553e-05, "loss": 0.378, "step": 50000 }, { "epoch": 0.38, "learning_rate": 1.6195816000482306e-05, "loss": 0.3586, "step": 50500 }, { "epoch": 0.38, "learning_rate": 1.615813588955206e-05, "loss": 0.3687, "step": 51000 }, { "epoch": 0.39, "learning_rate": 1.6120455778621812e-05, "loss": 0.3671, "step": 51500 }, { "epoch": 0.39, "learning_rate": 1.608277566769157e-05, "loss": 0.3488, "step": 52000 }, { "epoch": 0.4, "learning_rate": 1.6045095556761322e-05, "loss": 0.3683, "step": 52500 }, { "epoch": 0.4, "learning_rate": 1.6007490806052936e-05, "loss": 0.3654, "step": 53000 }, { "epoch": 0.4, "learning_rate": 1.596981069512269e-05, "loss": 0.3666, "step": 53500 }, { "epoch": 0.41, "learning_rate": 1.593213058419244e-05, "loss": 0.3595, "step": 54000 }, { "epoch": 0.41, "learning_rate": 1.5894525833484056e-05, "loss": 0.3567, "step": 54500 }, { "epoch": 0.41, "learning_rate": 1.585684572255381e-05, "loss": 0.3636, "step": 55000 }, { "epoch": 0.42, "learning_rate": 1.5819165611623562e-05, "loss": 0.3538, "step": 55500 }, { "epoch": 0.42, "learning_rate": 1.5781485500693315e-05, "loss": 0.361, "step": 56000 }, { "epoch": 0.43, "learning_rate": 1.5743805389763068e-05, "loss": 0.3573, "step": 56500 }, { "epoch": 0.43, "learning_rate": 1.570612527883282e-05, "loss": 0.3674, "step": 57000 }, { "epoch": 0.43, "learning_rate": 1.5668445167902574e-05, "loss": 0.3655, "step": 57500 }, { "epoch": 0.44, "learning_rate": 1.5630765056972328e-05, "loss": 0.352, "step": 58000 }, { "epoch": 0.44, "learning_rate": 1.5593084946042084e-05, "loss": 0.3555, "step": 58500 }, { "epoch": 0.44, "learning_rate": 1.555555555555556e-05, "loss": 0.3697, "step": 59000 }, { "epoch": 0.45, "learning_rate": 1.551787544462531e-05, "loss": 0.3542, "step": 59500 }, { "epoch": 0.45, "learning_rate": 1.5480270693916922e-05, "loss": 0.3504, "step": 60000 }, { "epoch": 0.46, "learning_rate": 1.5442590582986678e-05, "loss": 0.366, "step": 60500 }, { "epoch": 0.46, "learning_rate": 1.540491047205643e-05, "loss": 0.3512, "step": 61000 }, { "epoch": 0.46, "learning_rate": 1.5367230361126185e-05, "loss": 0.3546, "step": 61500 }, { "epoch": 0.47, "learning_rate": 1.5329550250195938e-05, "loss": 0.3626, "step": 62000 }, { "epoch": 0.47, "learning_rate": 1.529187013926569e-05, "loss": 0.3569, "step": 62500 }, { "epoch": 0.47, "learning_rate": 1.5254190028335446e-05, "loss": 0.3741, "step": 63000 }, { "epoch": 0.48, "learning_rate": 1.5216509917405197e-05, "loss": 0.3747, "step": 63500 }, { "epoch": 0.48, "learning_rate": 1.517882980647495e-05, "loss": 0.3624, "step": 64000 }, { "epoch": 0.49, "learning_rate": 1.5141149695544705e-05, "loss": 0.3554, "step": 64500 }, { "epoch": 0.49, "learning_rate": 1.5103469584614458e-05, "loss": 0.3782, "step": 65000 }, { "epoch": 0.49, "learning_rate": 1.5065789473684211e-05, "loss": 0.3637, "step": 65500 }, { "epoch": 0.5, "learning_rate": 1.5028109362753964e-05, "loss": 0.3764, "step": 66000 }, { "epoch": 0.5, "learning_rate": 1.499042925182372e-05, "loss": 0.3706, "step": 66500 }, { "epoch": 0.5, "learning_rate": 1.4952749140893472e-05, "loss": 0.3583, "step": 67000 }, { "epoch": 0.51, "learning_rate": 1.4915069029963225e-05, "loss": 0.3526, "step": 67500 }, { "epoch": 0.51, "learning_rate": 1.487746427925484e-05, "loss": 0.3622, "step": 68000 }, { "epoch": 0.52, "learning_rate": 1.4839859528546455e-05, "loss": 0.365, "step": 68500 }, { "epoch": 0.52, "learning_rate": 1.4802179417616208e-05, "loss": 0.3603, "step": 69000 }, { "epoch": 0.52, "learning_rate": 1.4764499306685959e-05, "loss": 0.3603, "step": 69500 }, { "epoch": 0.53, "learning_rate": 1.4726819195755712e-05, "loss": 0.3588, "step": 70000 }, { "epoch": 0.53, "learning_rate": 1.4689139084825467e-05, "loss": 0.3682, "step": 70500 }, { "epoch": 0.54, "learning_rate": 1.465145897389522e-05, "loss": 0.3629, "step": 71000 }, { "epoch": 0.54, "learning_rate": 1.4613778862964973e-05, "loss": 0.3554, "step": 71500 }, { "epoch": 0.54, "learning_rate": 1.4576098752034726e-05, "loss": 0.3608, "step": 72000 }, { "epoch": 0.55, "learning_rate": 1.4538418641104481e-05, "loss": 0.3463, "step": 72500 }, { "epoch": 0.55, "learning_rate": 1.4500813890396095e-05, "loss": 0.3688, "step": 73000 }, { "epoch": 0.55, "learning_rate": 1.4463133779465848e-05, "loss": 0.3604, "step": 73500 }, { "epoch": 0.56, "learning_rate": 1.4425453668535601e-05, "loss": 0.3446, "step": 74000 }, { "epoch": 0.56, "learning_rate": 1.4387924278049074e-05, "loss": 0.3485, "step": 74500 }, { "epoch": 0.57, "learning_rate": 1.435024416711883e-05, "loss": 0.3667, "step": 75000 }, { "epoch": 0.57, "learning_rate": 1.4312564056188582e-05, "loss": 0.3521, "step": 75500 }, { "epoch": 0.57, "learning_rate": 1.4274883945258335e-05, "loss": 0.3608, "step": 76000 }, { "epoch": 0.58, "learning_rate": 1.423720383432809e-05, "loss": 0.3582, "step": 76500 }, { "epoch": 0.58, "learning_rate": 1.4199599083619703e-05, "loss": 0.3588, "step": 77000 }, { "epoch": 0.58, "learning_rate": 1.4161918972689457e-05, "loss": 0.3626, "step": 77500 }, { "epoch": 0.59, "learning_rate": 1.412423886175921e-05, "loss": 0.362, "step": 78000 }, { "epoch": 0.59, "learning_rate": 1.4086558750828963e-05, "loss": 0.3738, "step": 78500 }, { "epoch": 0.6, "learning_rate": 1.4048878639898718e-05, "loss": 0.3594, "step": 79000 }, { "epoch": 0.6, "learning_rate": 1.4011273889190331e-05, "loss": 0.3624, "step": 79500 }, { "epoch": 0.6, "learning_rate": 1.3973593778260084e-05, "loss": 0.3576, "step": 80000 }, { "epoch": 0.61, "learning_rate": 1.3935913667329838e-05, "loss": 0.3474, "step": 80500 }, { "epoch": 0.61, "learning_rate": 1.3898308916621453e-05, "loss": 0.3685, "step": 81000 }, { "epoch": 0.61, "learning_rate": 1.3860628805691206e-05, "loss": 0.3528, "step": 81500 }, { "epoch": 0.62, "learning_rate": 1.3822948694760958e-05, "loss": 0.3605, "step": 82000 }, { "epoch": 0.62, "learning_rate": 1.378526858383071e-05, "loss": 0.3482, "step": 82500 }, { "epoch": 0.63, "learning_rate": 1.3747588472900465e-05, "loss": 0.3589, "step": 83000 }, { "epoch": 0.63, "learning_rate": 1.3709908361970219e-05, "loss": 0.3451, "step": 83500 }, { "epoch": 0.63, "learning_rate": 1.3672228251039972e-05, "loss": 0.3596, "step": 84000 }, { "epoch": 0.64, "learning_rate": 1.3634548140109725e-05, "loss": 0.3493, "step": 84500 }, { "epoch": 0.64, "learning_rate": 1.359686802917948e-05, "loss": 0.3465, "step": 85000 }, { "epoch": 0.64, "learning_rate": 1.3559187918249233e-05, "loss": 0.3617, "step": 85500 }, { "epoch": 0.65, "learning_rate": 1.3521583167540847e-05, "loss": 0.3509, "step": 86000 }, { "epoch": 0.65, "learning_rate": 1.34839030566106e-05, "loss": 0.3592, "step": 86500 }, { "epoch": 0.66, "learning_rate": 1.3446222945680354e-05, "loss": 0.3452, "step": 87000 }, { "epoch": 0.66, "learning_rate": 1.3408542834750108e-05, "loss": 0.3492, "step": 87500 }, { "epoch": 0.66, "learning_rate": 1.3370862723819859e-05, "loss": 0.3665, "step": 88000 }, { "epoch": 0.67, "learning_rate": 1.3333182612889612e-05, "loss": 0.3579, "step": 88500 }, { "epoch": 0.67, "learning_rate": 1.3295502501959367e-05, "loss": 0.3589, "step": 89000 }, { "epoch": 0.67, "learning_rate": 1.325782239102912e-05, "loss": 0.3557, "step": 89500 }, { "epoch": 0.68, "learning_rate": 1.3220217640320734e-05, "loss": 0.3517, "step": 90000 }, { "epoch": 0.68, "learning_rate": 1.3182537529390487e-05, "loss": 0.3586, "step": 90500 }, { "epoch": 0.69, "learning_rate": 1.3144857418460242e-05, "loss": 0.3475, "step": 91000 }, { "epoch": 0.69, "learning_rate": 1.3107177307529995e-05, "loss": 0.3692, "step": 91500 }, { "epoch": 0.69, "learning_rate": 1.3069497196599748e-05, "loss": 0.3601, "step": 92000 }, { "epoch": 0.7, "learning_rate": 1.3031817085669501e-05, "loss": 0.3708, "step": 92500 }, { "epoch": 0.7, "learning_rate": 1.2994136974739256e-05, "loss": 0.3509, "step": 93000 }, { "epoch": 0.7, "learning_rate": 1.2956456863809009e-05, "loss": 0.3546, "step": 93500 }, { "epoch": 0.71, "learning_rate": 1.2918852113100621e-05, "loss": 0.3474, "step": 94000 }, { "epoch": 0.71, "learning_rate": 1.2881172002170374e-05, "loss": 0.3557, "step": 94500 }, { "epoch": 0.72, "learning_rate": 1.2843491891240129e-05, "loss": 0.3506, "step": 95000 }, { "epoch": 0.72, "learning_rate": 1.2805811780309882e-05, "loss": 0.3487, "step": 95500 }, { "epoch": 0.72, "learning_rate": 1.2768207029601496e-05, "loss": 0.3478, "step": 96000 }, { "epoch": 0.73, "learning_rate": 1.2730526918671249e-05, "loss": 0.3481, "step": 96500 }, { "epoch": 0.73, "learning_rate": 1.2692846807741004e-05, "loss": 0.3409, "step": 97000 }, { "epoch": 0.73, "learning_rate": 1.2655166696810757e-05, "loss": 0.3455, "step": 97500 }, { "epoch": 0.74, "learning_rate": 1.261748658588051e-05, "loss": 0.3508, "step": 98000 }, { "epoch": 0.74, "learning_rate": 1.2579881835172124e-05, "loss": 0.3674, "step": 98500 }, { "epoch": 0.75, "learning_rate": 1.254227708446374e-05, "loss": 0.3635, "step": 99000 }, { "epoch": 0.75, "learning_rate": 1.2504672333755353e-05, "loss": 0.3472, "step": 99500 }, { "epoch": 0.75, "learning_rate": 1.2466992222825104e-05, "loss": 0.3561, "step": 100000 }, { "epoch": 0.76, "learning_rate": 1.2429312111894857e-05, "loss": 0.3596, "step": 100500 }, { "epoch": 0.76, "learning_rate": 1.2391632000964612e-05, "loss": 0.35, "step": 101000 }, { "epoch": 0.76, "learning_rate": 1.2353951890034365e-05, "loss": 0.3555, "step": 101500 }, { "epoch": 0.77, "learning_rate": 1.2316271779104119e-05, "loss": 0.3564, "step": 102000 }, { "epoch": 0.77, "learning_rate": 1.2278591668173872e-05, "loss": 0.3566, "step": 102500 }, { "epoch": 0.78, "learning_rate": 1.2240986917465485e-05, "loss": 0.3648, "step": 103000 }, { "epoch": 0.78, "learning_rate": 1.220330680653524e-05, "loss": 0.3538, "step": 103500 }, { "epoch": 0.78, "learning_rate": 1.2165626695604993e-05, "loss": 0.3413, "step": 104000 }, { "epoch": 0.79, "learning_rate": 1.2127946584674746e-05, "loss": 0.3527, "step": 104500 }, { "epoch": 0.79, "learning_rate": 1.2090266473744501e-05, "loss": 0.3518, "step": 105000 }, { "epoch": 0.8, "learning_rate": 1.2052586362814254e-05, "loss": 0.3495, "step": 105500 }, { "epoch": 0.8, "learning_rate": 1.2014906251884006e-05, "loss": 0.3607, "step": 106000 }, { "epoch": 0.8, "learning_rate": 1.1977226140953759e-05, "loss": 0.3554, "step": 106500 }, { "epoch": 0.81, "learning_rate": 1.1939546030023514e-05, "loss": 0.353, "step": 107000 }, { "epoch": 0.81, "learning_rate": 1.1901941279315127e-05, "loss": 0.3661, "step": 107500 }, { "epoch": 0.81, "learning_rate": 1.186426116838488e-05, "loss": 0.3478, "step": 108000 }, { "epoch": 0.82, "learning_rate": 1.1826581057454634e-05, "loss": 0.3532, "step": 108500 }, { "epoch": 0.82, "learning_rate": 1.1788900946524387e-05, "loss": 0.3567, "step": 109000 }, { "epoch": 0.83, "learning_rate": 1.1751220835594142e-05, "loss": 0.3537, "step": 109500 }, { "epoch": 0.83, "learning_rate": 1.1713616084885755e-05, "loss": 0.3663, "step": 110000 }, { "epoch": 0.83, "learning_rate": 1.1675935973955508e-05, "loss": 0.3393, "step": 110500 }, { "epoch": 0.84, "learning_rate": 1.163825586302526e-05, "loss": 0.3505, "step": 111000 }, { "epoch": 0.84, "learning_rate": 1.1600575752095016e-05, "loss": 0.3554, "step": 111500 }, { "epoch": 0.84, "learning_rate": 1.1562971001386628e-05, "loss": 0.343, "step": 112000 }, { "epoch": 0.85, "learning_rate": 1.1525290890456382e-05, "loss": 0.3533, "step": 112500 }, { "epoch": 0.85, "learning_rate": 1.1487610779526135e-05, "loss": 0.3425, "step": 113000 }, { "epoch": 0.86, "learning_rate": 1.144993066859589e-05, "loss": 0.3474, "step": 113500 }, { "epoch": 0.86, "learning_rate": 1.1412250557665643e-05, "loss": 0.3601, "step": 114000 }, { "epoch": 0.86, "learning_rate": 1.1374645806957256e-05, "loss": 0.3535, "step": 114500 }, { "epoch": 0.87, "learning_rate": 1.133696569602701e-05, "loss": 0.3515, "step": 115000 }, { "epoch": 0.87, "learning_rate": 1.1299285585096764e-05, "loss": 0.352, "step": 115500 }, { "epoch": 0.87, "learning_rate": 1.1261605474166517e-05, "loss": 0.3637, "step": 116000 }, { "epoch": 0.88, "learning_rate": 1.122392536323627e-05, "loss": 0.364, "step": 116500 }, { "epoch": 0.88, "learning_rate": 1.1186245252306022e-05, "loss": 0.3594, "step": 117000 }, { "epoch": 0.89, "learning_rate": 1.1148640501597639e-05, "loss": 0.3464, "step": 117500 }, { "epoch": 0.89, "learning_rate": 1.111096039066739e-05, "loss": 0.3451, "step": 118000 }, { "epoch": 0.89, "learning_rate": 1.1073280279737144e-05, "loss": 0.353, "step": 118500 }, { "epoch": 0.9, "learning_rate": 1.1035600168806897e-05, "loss": 0.3545, "step": 119000 }, { "epoch": 0.9, "learning_rate": 1.0997920057876652e-05, "loss": 0.3555, "step": 119500 }, { "epoch": 0.9, "learning_rate": 1.0960239946946405e-05, "loss": 0.358, "step": 120000 }, { "epoch": 0.91, "learning_rate": 1.0922559836016158e-05, "loss": 0.3431, "step": 120500 }, { "epoch": 0.91, "learning_rate": 1.0884955085307772e-05, "loss": 0.3587, "step": 121000 }, { "epoch": 0.92, "learning_rate": 1.0847274974377526e-05, "loss": 0.3434, "step": 121500 }, { "epoch": 0.92, "learning_rate": 1.080959486344728e-05, "loss": 0.3399, "step": 122000 }, { "epoch": 0.92, "learning_rate": 1.0771914752517033e-05, "loss": 0.3474, "step": 122500 }, { "epoch": 0.93, "learning_rate": 1.0734234641586784e-05, "loss": 0.3417, "step": 123000 }, { "epoch": 0.93, "learning_rate": 1.069655453065654e-05, "loss": 0.3477, "step": 123500 }, { "epoch": 0.93, "learning_rate": 1.0658874419726292e-05, "loss": 0.3439, "step": 124000 }, { "epoch": 0.94, "learning_rate": 1.0621194308796045e-05, "loss": 0.3451, "step": 124500 }, { "epoch": 0.94, "learning_rate": 1.0583514197865798e-05, "loss": 0.3551, "step": 125000 }, { "epoch": 0.95, "learning_rate": 1.0545834086935553e-05, "loss": 0.3472, "step": 125500 }, { "epoch": 0.95, "learning_rate": 1.0508153976005306e-05, "loss": 0.3472, "step": 126000 }, { "epoch": 0.95, "learning_rate": 1.047047386507506e-05, "loss": 0.3583, "step": 126500 }, { "epoch": 0.96, "learning_rate": 1.0432869114366673e-05, "loss": 0.3615, "step": 127000 }, { "epoch": 0.96, "learning_rate": 1.0395189003436428e-05, "loss": 0.3433, "step": 127500 }, { "epoch": 0.96, "learning_rate": 1.0357508892506181e-05, "loss": 0.3423, "step": 128000 }, { "epoch": 0.97, "learning_rate": 1.0319828781575934e-05, "loss": 0.3462, "step": 128500 }, { "epoch": 0.97, "learning_rate": 1.0282148670645686e-05, "loss": 0.3546, "step": 129000 }, { "epoch": 0.98, "learning_rate": 1.0244543919937303e-05, "loss": 0.3412, "step": 129500 }, { "epoch": 0.98, "learning_rate": 1.0206939169228916e-05, "loss": 0.3372, "step": 130000 }, { "epoch": 0.98, "learning_rate": 1.0169259058298668e-05, "loss": 0.3558, "step": 130500 }, { "epoch": 0.99, "learning_rate": 1.0131578947368421e-05, "loss": 0.3409, "step": 131000 }, { "epoch": 0.99, "learning_rate": 1.0093898836438176e-05, "loss": 0.3529, "step": 131500 }, { "epoch": 0.99, "learning_rate": 1.0056218725507929e-05, "loss": 0.3508, "step": 132000 }, { "epoch": 1.0, "learning_rate": 1.0018538614577682e-05, "loss": 0.3572, "step": 132500 }, { "epoch": 1.0, "eval_accuracy": 0.842885983877665, "eval_loss": 0.3574163317680359, "eval_runtime": 225.821, "eval_samples_per_second": 309.276, "eval_steps_per_second": 30.932, "step": 132696 } ], "max_steps": 265392, "num_train_epochs": 2, "total_flos": 3.308206075463448e+16, "trial_name": null, "trial_params": null }