{ "best_metric": 0.7994814174589455, "best_model_checkpoint": "../../checkpoints/baseline/default-baseline-uncleaned/lm_model/finetune/qnli/checkpoint-6000", "epoch": 10.0, "global_step": 6870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "eval_accuracy": 0.7541557550430298, "eval_f1": 0.7606473594548551, "eval_loss": 0.513882577419281, "eval_runtime": 2.5883, "eval_samples_per_second": 883.22, "eval_steps_per_second": 110.499, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.7655293345451355, "eval_f1": 0.7691645133505599, "eval_loss": 0.49110308289527893, "eval_runtime": 2.5616, "eval_samples_per_second": 892.424, "eval_steps_per_second": 111.651, "step": 400 }, { "epoch": 0.73, "learning_rate": 4.636098981077147e-05, "loss": 0.5823, "step": 500 }, { "epoch": 0.87, "eval_accuracy": 0.7729659080505371, "eval_f1": 0.7746417716022579, "eval_loss": 0.4785449504852295, "eval_runtime": 2.5604, "eval_samples_per_second": 892.818, "eval_steps_per_second": 111.7, "step": 600 }, { "epoch": 1.16, "eval_accuracy": 0.787401556968689, "eval_f1": 0.7940677966101695, "eval_loss": 0.4633069336414337, "eval_runtime": 2.5555, "eval_samples_per_second": 894.547, "eval_steps_per_second": 111.916, "step": 800 }, { "epoch": 1.46, "learning_rate": 4.272197962154294e-05, "loss": 0.5036, "step": 1000 }, { "epoch": 1.46, "eval_accuracy": 0.7725284099578857, "eval_f1": 0.7653429602888085, "eval_loss": 0.48655858635902405, "eval_runtime": 2.5624, "eval_samples_per_second": 892.133, "eval_steps_per_second": 111.614, "step": 1000 }, { "epoch": 1.75, "eval_accuracy": 0.7904636859893799, "eval_f1": 0.7993297025555091, "eval_loss": 0.45168524980545044, "eval_runtime": 2.5579, "eval_samples_per_second": 893.715, "eval_steps_per_second": 111.812, "step": 1200 }, { "epoch": 2.04, "eval_accuracy": 0.7909011244773865, "eval_f1": 0.7941429801894918, "eval_loss": 0.45709288120269775, "eval_runtime": 2.5535, "eval_samples_per_second": 895.253, "eval_steps_per_second": 112.005, "step": 1400 }, { "epoch": 2.18, "learning_rate": 3.9082969432314415e-05, "loss": 0.482, "step": 1500 }, { "epoch": 2.33, "eval_accuracy": 0.7917760014533997, "eval_f1": 0.797274275979557, "eval_loss": 0.45153066515922546, "eval_runtime": 2.5517, "eval_samples_per_second": 895.864, "eval_steps_per_second": 112.081, "step": 1600 }, { "epoch": 2.62, "eval_accuracy": 0.7970253825187683, "eval_f1": 0.8028887000849616, "eval_loss": 0.45009395480155945, "eval_runtime": 2.5577, "eval_samples_per_second": 893.772, "eval_steps_per_second": 111.819, "step": 1800 }, { "epoch": 2.91, "learning_rate": 3.544395924308588e-05, "loss": 0.461, "step": 2000 }, { "epoch": 2.91, "eval_accuracy": 0.7913385629653931, "eval_f1": 0.7899603698811096, "eval_loss": 0.4560691714286804, "eval_runtime": 2.5537, "eval_samples_per_second": 895.184, "eval_steps_per_second": 111.996, "step": 2000 }, { "epoch": 3.2, "eval_accuracy": 0.8018372654914856, "eval_f1": 0.8119551681195518, "eval_loss": 0.4423236548900604, "eval_runtime": 2.5844, "eval_samples_per_second": 884.554, "eval_steps_per_second": 110.666, "step": 2200 }, { "epoch": 3.49, "eval_accuracy": 0.7939632534980774, "eval_f1": 0.8006771053745239, "eval_loss": 0.4459246098995209, "eval_runtime": 2.5753, "eval_samples_per_second": 887.668, "eval_steps_per_second": 111.056, "step": 2400 }, { "epoch": 3.64, "learning_rate": 3.1804949053857355e-05, "loss": 0.4398, "step": 2500 }, { "epoch": 3.78, "eval_accuracy": 0.8027121424674988, "eval_f1": 0.8112180828798661, "eval_loss": 0.4345141649246216, "eval_runtime": 2.5691, "eval_samples_per_second": 889.815, "eval_steps_per_second": 111.324, "step": 2600 }, { "epoch": 4.08, "eval_accuracy": 0.8009623885154724, "eval_f1": 0.8151158065826899, "eval_loss": 0.4380125403404236, "eval_runtime": 2.563, "eval_samples_per_second": 891.909, "eval_steps_per_second": 111.586, "step": 2800 }, { "epoch": 4.37, "learning_rate": 2.816593886462882e-05, "loss": 0.4272, "step": 3000 }, { "epoch": 4.37, "eval_accuracy": 0.7913385629653931, "eval_f1": 0.7934170636639237, "eval_loss": 0.45032593607902527, "eval_runtime": 2.5682, "eval_samples_per_second": 890.103, "eval_steps_per_second": 111.36, "step": 3000 }, { "epoch": 4.66, "eval_accuracy": 0.8005249500274658, "eval_f1": 0.8059574468085107, "eval_loss": 0.4483606219291687, "eval_runtime": 2.5703, "eval_samples_per_second": 889.378, "eval_steps_per_second": 111.269, "step": 3200 }, { "epoch": 4.95, "eval_accuracy": 0.7948381304740906, "eval_f1": 0.7934830471158081, "eval_loss": 0.456741064786911, "eval_runtime": 2.5765, "eval_samples_per_second": 887.256, "eval_steps_per_second": 111.004, "step": 3400 }, { "epoch": 5.09, "learning_rate": 2.452692867540029e-05, "loss": 0.4153, "step": 3500 }, { "epoch": 5.24, "eval_accuracy": 0.7970253825187683, "eval_f1": 0.8008583690987126, "eval_loss": 0.4510791301727295, "eval_runtime": 2.5665, "eval_samples_per_second": 890.719, "eval_steps_per_second": 111.437, "step": 3600 }, { "epoch": 5.53, "eval_accuracy": 0.7957130074501038, "eval_f1": 0.797044763146458, "eval_loss": 0.44894999265670776, "eval_runtime": 2.5695, "eval_samples_per_second": 889.652, "eval_steps_per_second": 111.304, "step": 3800 }, { "epoch": 5.82, "learning_rate": 2.088791848617176e-05, "loss": 0.4055, "step": 4000 }, { "epoch": 5.82, "eval_accuracy": 0.7939632534980774, "eval_f1": 0.7954841511072515, "eval_loss": 0.45642799139022827, "eval_runtime": 2.5702, "eval_samples_per_second": 889.408, "eval_steps_per_second": 111.273, "step": 4000 }, { "epoch": 6.11, "eval_accuracy": 0.7992125749588013, "eval_f1": 0.8040973111395647, "eval_loss": 0.4486392140388489, "eval_runtime": 2.5739, "eval_samples_per_second": 888.144, "eval_steps_per_second": 111.115, "step": 4200 }, { "epoch": 6.4, "eval_accuracy": 0.8018372654914856, "eval_f1": 0.8074798130046749, "eval_loss": 0.44145750999450684, "eval_runtime": 2.5725, "eval_samples_per_second": 888.634, "eval_steps_per_second": 111.176, "step": 4400 }, { "epoch": 6.55, "learning_rate": 1.7248908296943234e-05, "loss": 0.3918, "step": 4500 }, { "epoch": 6.7, "eval_accuracy": 0.7970253825187683, "eval_f1": 0.7980852915578764, "eval_loss": 0.4575858414173126, "eval_runtime": 2.5712, "eval_samples_per_second": 889.092, "eval_steps_per_second": 111.234, "step": 4600 }, { "epoch": 6.99, "eval_accuracy": 0.8018372654914856, "eval_f1": 0.8084566596194505, "eval_loss": 0.4492335319519043, "eval_runtime": 2.5716, "eval_samples_per_second": 888.937, "eval_steps_per_second": 111.214, "step": 4800 }, { "epoch": 7.28, "learning_rate": 1.3609898107714703e-05, "loss": 0.3772, "step": 5000 }, { "epoch": 7.28, "eval_accuracy": 0.7961505055427551, "eval_f1": 0.7963286713286714, "eval_loss": 0.4673013389110565, "eval_runtime": 2.5753, "eval_samples_per_second": 887.674, "eval_steps_per_second": 111.056, "step": 5000 }, { "epoch": 7.57, "eval_accuracy": 0.7996500730514526, "eval_f1": 0.7994746059544658, "eval_loss": 0.4637942910194397, "eval_runtime": 2.5678, "eval_samples_per_second": 890.24, "eval_steps_per_second": 111.377, "step": 5200 }, { "epoch": 7.86, "eval_accuracy": 0.7974628210067749, "eval_f1": 0.8013728013728013, "eval_loss": 0.45550382137298584, "eval_runtime": 2.5738, "eval_samples_per_second": 888.174, "eval_steps_per_second": 111.119, "step": 5400 }, { "epoch": 8.01, "learning_rate": 9.970887918486172e-06, "loss": 0.377, "step": 5500 }, { "epoch": 8.15, "eval_accuracy": 0.7961505055427551, "eval_f1": 0.7991379310344828, "eval_loss": 0.4626655578613281, "eval_runtime": 2.5693, "eval_samples_per_second": 889.72, "eval_steps_per_second": 111.312, "step": 5600 }, { "epoch": 8.44, "eval_accuracy": 0.7970253825187683, "eval_f1": 0.8017094017094017, "eval_loss": 0.4536679685115814, "eval_runtime": 2.5725, "eval_samples_per_second": 888.631, "eval_steps_per_second": 111.176, "step": 5800 }, { "epoch": 8.73, "learning_rate": 6.3318777292576415e-06, "loss": 0.3707, "step": 6000 }, { "epoch": 8.73, "eval_accuracy": 0.7970253825187683, "eval_f1": 0.7994814174589455, "eval_loss": 0.45931074023246765, "eval_runtime": 2.5683, "eval_samples_per_second": 890.099, "eval_steps_per_second": 111.36, "step": 6000 }, { "epoch": 9.02, "eval_accuracy": 0.7965879440307617, "eval_f1": 0.7975620374401393, "eval_loss": 0.46558013558387756, "eval_runtime": 2.5731, "eval_samples_per_second": 888.418, "eval_steps_per_second": 111.149, "step": 6200 }, { "epoch": 9.32, "eval_accuracy": 0.7965879440307617, "eval_f1": 0.8, "eval_loss": 0.4601030945777893, "eval_runtime": 2.5774, "eval_samples_per_second": 886.945, "eval_steps_per_second": 110.965, "step": 6400 }, { "epoch": 9.46, "learning_rate": 2.692867540029112e-06, "loss": 0.366, "step": 6500 }, { "epoch": 9.61, "eval_accuracy": 0.7961505055427551, "eval_f1": 0.7989646246764452, "eval_loss": 0.45923250913619995, "eval_runtime": 2.5827, "eval_samples_per_second": 885.108, "eval_steps_per_second": 110.735, "step": 6600 }, { "epoch": 9.9, "eval_accuracy": 0.7974628210067749, "eval_f1": 0.8015430775825118, "eval_loss": 0.4570430517196655, "eval_runtime": 2.5678, "eval_samples_per_second": 890.266, "eval_steps_per_second": 111.381, "step": 6800 }, { "epoch": 10.0, "step": 6870, "total_flos": 1.022590196992512e+16, "train_loss": 0.4268792453638157, "train_runtime": 1110.0444, "train_samples_per_second": 395.633, "train_steps_per_second": 6.189 } ], "max_steps": 6870, "num_train_epochs": 10, "total_flos": 1.022590196992512e+16, "trial_name": null, "trial_params": null }