|
{ |
|
"best_metric": 0.7994814174589455, |
|
"best_model_checkpoint": "../../checkpoints/baseline/default-baseline-uncleaned/lm_model/finetune/qnli/checkpoint-6000", |
|
"epoch": 10.0, |
|
"global_step": 6870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.7541557550430298, |
|
"eval_f1": 0.7606473594548551, |
|
"eval_loss": 0.513882577419281, |
|
"eval_runtime": 2.5883, |
|
"eval_samples_per_second": 883.22, |
|
"eval_steps_per_second": 110.499, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.7655293345451355, |
|
"eval_f1": 0.7691645133505599, |
|
"eval_loss": 0.49110308289527893, |
|
"eval_runtime": 2.5616, |
|
"eval_samples_per_second": 892.424, |
|
"eval_steps_per_second": 111.651, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.636098981077147e-05, |
|
"loss": 0.5823, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.7729659080505371, |
|
"eval_f1": 0.7746417716022579, |
|
"eval_loss": 0.4785449504852295, |
|
"eval_runtime": 2.5604, |
|
"eval_samples_per_second": 892.818, |
|
"eval_steps_per_second": 111.7, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.787401556968689, |
|
"eval_f1": 0.7940677966101695, |
|
"eval_loss": 0.4633069336414337, |
|
"eval_runtime": 2.5555, |
|
"eval_samples_per_second": 894.547, |
|
"eval_steps_per_second": 111.916, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.272197962154294e-05, |
|
"loss": 0.5036, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.7725284099578857, |
|
"eval_f1": 0.7653429602888085, |
|
"eval_loss": 0.48655858635902405, |
|
"eval_runtime": 2.5624, |
|
"eval_samples_per_second": 892.133, |
|
"eval_steps_per_second": 111.614, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.7904636859893799, |
|
"eval_f1": 0.7993297025555091, |
|
"eval_loss": 0.45168524980545044, |
|
"eval_runtime": 2.5579, |
|
"eval_samples_per_second": 893.715, |
|
"eval_steps_per_second": 111.812, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.7909011244773865, |
|
"eval_f1": 0.7941429801894918, |
|
"eval_loss": 0.45709288120269775, |
|
"eval_runtime": 2.5535, |
|
"eval_samples_per_second": 895.253, |
|
"eval_steps_per_second": 112.005, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.9082969432314415e-05, |
|
"loss": 0.482, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7917760014533997, |
|
"eval_f1": 0.797274275979557, |
|
"eval_loss": 0.45153066515922546, |
|
"eval_runtime": 2.5517, |
|
"eval_samples_per_second": 895.864, |
|
"eval_steps_per_second": 112.081, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.7970253825187683, |
|
"eval_f1": 0.8028887000849616, |
|
"eval_loss": 0.45009395480155945, |
|
"eval_runtime": 2.5577, |
|
"eval_samples_per_second": 893.772, |
|
"eval_steps_per_second": 111.819, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.544395924308588e-05, |
|
"loss": 0.461, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.7913385629653931, |
|
"eval_f1": 0.7899603698811096, |
|
"eval_loss": 0.4560691714286804, |
|
"eval_runtime": 2.5537, |
|
"eval_samples_per_second": 895.184, |
|
"eval_steps_per_second": 111.996, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.8018372654914856, |
|
"eval_f1": 0.8119551681195518, |
|
"eval_loss": 0.4423236548900604, |
|
"eval_runtime": 2.5844, |
|
"eval_samples_per_second": 884.554, |
|
"eval_steps_per_second": 110.666, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_accuracy": 0.7939632534980774, |
|
"eval_f1": 0.8006771053745239, |
|
"eval_loss": 0.4459246098995209, |
|
"eval_runtime": 2.5753, |
|
"eval_samples_per_second": 887.668, |
|
"eval_steps_per_second": 111.056, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.1804949053857355e-05, |
|
"loss": 0.4398, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_accuracy": 0.8027121424674988, |
|
"eval_f1": 0.8112180828798661, |
|
"eval_loss": 0.4345141649246216, |
|
"eval_runtime": 2.5691, |
|
"eval_samples_per_second": 889.815, |
|
"eval_steps_per_second": 111.324, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.8009623885154724, |
|
"eval_f1": 0.8151158065826899, |
|
"eval_loss": 0.4380125403404236, |
|
"eval_runtime": 2.563, |
|
"eval_samples_per_second": 891.909, |
|
"eval_steps_per_second": 111.586, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.816593886462882e-05, |
|
"loss": 0.4272, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.7913385629653931, |
|
"eval_f1": 0.7934170636639237, |
|
"eval_loss": 0.45032593607902527, |
|
"eval_runtime": 2.5682, |
|
"eval_samples_per_second": 890.103, |
|
"eval_steps_per_second": 111.36, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_accuracy": 0.8005249500274658, |
|
"eval_f1": 0.8059574468085107, |
|
"eval_loss": 0.4483606219291687, |
|
"eval_runtime": 2.5703, |
|
"eval_samples_per_second": 889.378, |
|
"eval_steps_per_second": 111.269, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.7948381304740906, |
|
"eval_f1": 0.7934830471158081, |
|
"eval_loss": 0.456741064786911, |
|
"eval_runtime": 2.5765, |
|
"eval_samples_per_second": 887.256, |
|
"eval_steps_per_second": 111.004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.452692867540029e-05, |
|
"loss": 0.4153, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.7970253825187683, |
|
"eval_f1": 0.8008583690987126, |
|
"eval_loss": 0.4510791301727295, |
|
"eval_runtime": 2.5665, |
|
"eval_samples_per_second": 890.719, |
|
"eval_steps_per_second": 111.437, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_accuracy": 0.7957130074501038, |
|
"eval_f1": 0.797044763146458, |
|
"eval_loss": 0.44894999265670776, |
|
"eval_runtime": 2.5695, |
|
"eval_samples_per_second": 889.652, |
|
"eval_steps_per_second": 111.304, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.088791848617176e-05, |
|
"loss": 0.4055, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy": 0.7939632534980774, |
|
"eval_f1": 0.7954841511072515, |
|
"eval_loss": 0.45642799139022827, |
|
"eval_runtime": 2.5702, |
|
"eval_samples_per_second": 889.408, |
|
"eval_steps_per_second": 111.273, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_accuracy": 0.7992125749588013, |
|
"eval_f1": 0.8040973111395647, |
|
"eval_loss": 0.4486392140388489, |
|
"eval_runtime": 2.5739, |
|
"eval_samples_per_second": 888.144, |
|
"eval_steps_per_second": 111.115, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.8018372654914856, |
|
"eval_f1": 0.8074798130046749, |
|
"eval_loss": 0.44145750999450684, |
|
"eval_runtime": 2.5725, |
|
"eval_samples_per_second": 888.634, |
|
"eval_steps_per_second": 111.176, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.7248908296943234e-05, |
|
"loss": 0.3918, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_accuracy": 0.7970253825187683, |
|
"eval_f1": 0.7980852915578764, |
|
"eval_loss": 0.4575858414173126, |
|
"eval_runtime": 2.5712, |
|
"eval_samples_per_second": 889.092, |
|
"eval_steps_per_second": 111.234, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8018372654914856, |
|
"eval_f1": 0.8084566596194505, |
|
"eval_loss": 0.4492335319519043, |
|
"eval_runtime": 2.5716, |
|
"eval_samples_per_second": 888.937, |
|
"eval_steps_per_second": 111.214, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.3609898107714703e-05, |
|
"loss": 0.3772, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_accuracy": 0.7961505055427551, |
|
"eval_f1": 0.7963286713286714, |
|
"eval_loss": 0.4673013389110565, |
|
"eval_runtime": 2.5753, |
|
"eval_samples_per_second": 887.674, |
|
"eval_steps_per_second": 111.056, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_accuracy": 0.7996500730514526, |
|
"eval_f1": 0.7994746059544658, |
|
"eval_loss": 0.4637942910194397, |
|
"eval_runtime": 2.5678, |
|
"eval_samples_per_second": 890.24, |
|
"eval_steps_per_second": 111.377, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_accuracy": 0.7974628210067749, |
|
"eval_f1": 0.8013728013728013, |
|
"eval_loss": 0.45550382137298584, |
|
"eval_runtime": 2.5738, |
|
"eval_samples_per_second": 888.174, |
|
"eval_steps_per_second": 111.119, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.970887918486172e-06, |
|
"loss": 0.377, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_accuracy": 0.7961505055427551, |
|
"eval_f1": 0.7991379310344828, |
|
"eval_loss": 0.4626655578613281, |
|
"eval_runtime": 2.5693, |
|
"eval_samples_per_second": 889.72, |
|
"eval_steps_per_second": 111.312, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"eval_accuracy": 0.7970253825187683, |
|
"eval_f1": 0.8017094017094017, |
|
"eval_loss": 0.4536679685115814, |
|
"eval_runtime": 2.5725, |
|
"eval_samples_per_second": 888.631, |
|
"eval_steps_per_second": 111.176, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.3318777292576415e-06, |
|
"loss": 0.3707, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_accuracy": 0.7970253825187683, |
|
"eval_f1": 0.7994814174589455, |
|
"eval_loss": 0.45931074023246765, |
|
"eval_runtime": 2.5683, |
|
"eval_samples_per_second": 890.099, |
|
"eval_steps_per_second": 111.36, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_accuracy": 0.7965879440307617, |
|
"eval_f1": 0.7975620374401393, |
|
"eval_loss": 0.46558013558387756, |
|
"eval_runtime": 2.5731, |
|
"eval_samples_per_second": 888.418, |
|
"eval_steps_per_second": 111.149, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"eval_accuracy": 0.7965879440307617, |
|
"eval_f1": 0.8, |
|
"eval_loss": 0.4601030945777893, |
|
"eval_runtime": 2.5774, |
|
"eval_samples_per_second": 886.945, |
|
"eval_steps_per_second": 110.965, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.692867540029112e-06, |
|
"loss": 0.366, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_accuracy": 0.7961505055427551, |
|
"eval_f1": 0.7989646246764452, |
|
"eval_loss": 0.45923250913619995, |
|
"eval_runtime": 2.5827, |
|
"eval_samples_per_second": 885.108, |
|
"eval_steps_per_second": 110.735, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_accuracy": 0.7974628210067749, |
|
"eval_f1": 0.8015430775825118, |
|
"eval_loss": 0.4570430517196655, |
|
"eval_runtime": 2.5678, |
|
"eval_samples_per_second": 890.266, |
|
"eval_steps_per_second": 111.381, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6870, |
|
"total_flos": 1.022590196992512e+16, |
|
"train_loss": 0.4268792453638157, |
|
"train_runtime": 1110.0444, |
|
"train_samples_per_second": 395.633, |
|
"train_steps_per_second": 6.189 |
|
} |
|
], |
|
"max_steps": 6870, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.022590196992512e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|