|
{ |
|
"best_metric": 0.8023715415019763, |
|
"best_model_checkpoint": "./toxicity_c_202201181030/checkpoint-3000", |
|
"epoch": 1.2414895617829602, |
|
"global_step": 3300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.94e-05, |
|
"loss": 0.2336, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.9445713034502174, |
|
"eval_f1": 0.6696495152870993, |
|
"eval_loss": 0.1531878262758255, |
|
"eval_precision": 0.8655421686746988, |
|
"eval_recall": 0.5460626330191548, |
|
"eval_runtime": 209.3715, |
|
"eval_samples_per_second": 152.69, |
|
"eval_steps_per_second": 19.09, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.88e-05, |
|
"loss": 0.1549, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.9456973943507773, |
|
"eval_f1": 0.6756352765321375, |
|
"eval_loss": 0.1629406213760376, |
|
"eval_precision": 0.8763936015511391, |
|
"eval_recall": 0.5497111584068106, |
|
"eval_runtime": 209.809, |
|
"eval_samples_per_second": 152.372, |
|
"eval_steps_per_second": 19.051, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8199999999999998e-05, |
|
"loss": 0.158, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.949920235227877, |
|
"eval_f1": 0.7275820997107366, |
|
"eval_loss": 0.13407668471336365, |
|
"eval_precision": 0.8261205564142194, |
|
"eval_recall": 0.6500456065673457, |
|
"eval_runtime": 210.4551, |
|
"eval_samples_per_second": 151.904, |
|
"eval_steps_per_second": 18.992, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 0.1321, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.9515468109731302, |
|
"eval_f1": 0.7355301348813386, |
|
"eval_loss": 0.1435689479112625, |
|
"eval_precision": 0.8387850467289719, |
|
"eval_recall": 0.6549103070842202, |
|
"eval_runtime": 209.6366, |
|
"eval_samples_per_second": 152.497, |
|
"eval_steps_per_second": 19.066, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.1408, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.9473865307016172, |
|
"eval_f1": 0.7431276725717777, |
|
"eval_loss": 0.13682132959365845, |
|
"eval_precision": 0.7465480208652961, |
|
"eval_recall": 0.739738522347218, |
|
"eval_runtime": 210.8965, |
|
"eval_samples_per_second": 151.586, |
|
"eval_steps_per_second": 18.952, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.64e-05, |
|
"loss": 0.1385, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.9507960837060903, |
|
"eval_f1": 0.7527118377613583, |
|
"eval_loss": 0.12854768335819244, |
|
"eval_precision": 0.779296875, |
|
"eval_recall": 0.7278808148373366, |
|
"eval_runtime": 210.2245, |
|
"eval_samples_per_second": 152.071, |
|
"eval_steps_per_second": 19.013, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.58e-05, |
|
"loss": 0.1313, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9529857049016235, |
|
"eval_f1": 0.7370078740157481, |
|
"eval_loss": 0.14518482983112335, |
|
"eval_precision": 0.8680956306677658, |
|
"eval_recall": 0.6403162055335968, |
|
"eval_runtime": 210.6173, |
|
"eval_samples_per_second": 151.787, |
|
"eval_steps_per_second": 18.978, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.52e-05, |
|
"loss": 0.1399, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.949826394319497, |
|
"eval_f1": 0.7610250297973777, |
|
"eval_loss": 0.13412030041217804, |
|
"eval_precision": 0.7461291264972246, |
|
"eval_recall": 0.7765278200060809, |
|
"eval_runtime": 209.9319, |
|
"eval_samples_per_second": 152.283, |
|
"eval_steps_per_second": 19.04, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4599999999999998e-05, |
|
"loss": 0.1272, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.9536425912602834, |
|
"eval_f1": 0.7640878701050622, |
|
"eval_loss": 0.12946395576000214, |
|
"eval_precision": 0.8018710324089542, |
|
"eval_recall": 0.7297050775311645, |
|
"eval_runtime": 210.1105, |
|
"eval_samples_per_second": 152.153, |
|
"eval_steps_per_second": 19.023, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.1324, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.9478244549407239, |
|
"eval_f1": 0.7562116340251388, |
|
"eval_loss": 0.14241644740104675, |
|
"eval_precision": 0.7281170841542358, |
|
"eval_recall": 0.7865612648221344, |
|
"eval_runtime": 210.5453, |
|
"eval_samples_per_second": 151.839, |
|
"eval_steps_per_second": 18.984, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 0.1198, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.9530482655072101, |
|
"eval_f1": 0.7681137030743087, |
|
"eval_loss": 0.1265845000743866, |
|
"eval_precision": 0.7807788944723618, |
|
"eval_recall": 0.7558528428093646, |
|
"eval_runtime": 210.173, |
|
"eval_samples_per_second": 152.108, |
|
"eval_steps_per_second": 19.018, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 0.1232, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9558634927586099, |
|
"eval_f1": 0.7611308616895208, |
|
"eval_loss": 0.11988817900419235, |
|
"eval_precision": 0.8586707410236822, |
|
"eval_recall": 0.6834904226208574, |
|
"eval_runtime": 209.5961, |
|
"eval_samples_per_second": 152.527, |
|
"eval_steps_per_second": 19.07, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.22e-05, |
|
"loss": 0.1239, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.9564578185116832, |
|
"eval_f1": 0.7667560321715818, |
|
"eval_loss": 0.12424681335687637, |
|
"eval_precision": 0.8540500186636805, |
|
"eval_recall": 0.6956521739130435, |
|
"eval_runtime": 210.1306, |
|
"eval_samples_per_second": 152.139, |
|
"eval_steps_per_second": 19.022, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.16e-05, |
|
"loss": 0.1236, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.9570834245675498, |
|
"eval_f1": 0.7704918032786885, |
|
"eval_loss": 0.1350642293691635, |
|
"eval_precision": 0.8564522127184827, |
|
"eval_recall": 0.7002128306476133, |
|
"eval_runtime": 210.8014, |
|
"eval_samples_per_second": 151.655, |
|
"eval_steps_per_second": 18.961, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.1254, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9575526291094498, |
|
"eval_f1": 0.7775774463202754, |
|
"eval_loss": 0.11828587204217911, |
|
"eval_precision": 0.8435277382645804, |
|
"eval_recall": 0.7211918516266342, |
|
"eval_runtime": 209.8683, |
|
"eval_samples_per_second": 152.329, |
|
"eval_steps_per_second": 19.045, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.04e-05, |
|
"loss": 0.1252, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.9568644624479965, |
|
"eval_f1": 0.7638294228463779, |
|
"eval_loss": 0.12166079878807068, |
|
"eval_precision": 0.8745098039215686, |
|
"eval_recall": 0.6780176345393737, |
|
"eval_runtime": 210.147, |
|
"eval_samples_per_second": 152.127, |
|
"eval_steps_per_second": 19.02, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.98e-05, |
|
"loss": 0.1256, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.9574275078982765, |
|
"eval_f1": 0.7709910819451455, |
|
"eval_loss": 0.11569029092788696, |
|
"eval_precision": 0.8632253202712886, |
|
"eval_recall": 0.6965643052599574, |
|
"eval_runtime": 210.5963, |
|
"eval_samples_per_second": 151.802, |
|
"eval_steps_per_second": 18.979, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.1324, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.95426819731615, |
|
"eval_f1": 0.7747303543913714, |
|
"eval_loss": 0.12124760448932648, |
|
"eval_precision": 0.7853795688847235, |
|
"eval_recall": 0.7643660687138948, |
|
"eval_runtime": 210.0339, |
|
"eval_samples_per_second": 152.209, |
|
"eval_steps_per_second": 19.03, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.86e-05, |
|
"loss": 0.1192, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.9576777503206231, |
|
"eval_f1": 0.7719534805326143, |
|
"eval_loss": 0.12099746614694595, |
|
"eval_precision": 0.8661119515885023, |
|
"eval_recall": 0.6962602614776527, |
|
"eval_runtime": 209.8273, |
|
"eval_samples_per_second": 152.359, |
|
"eval_steps_per_second": 19.049, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.1234, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.9573023866871031, |
|
"eval_f1": 0.781074578989575, |
|
"eval_loss": 0.11586691439151764, |
|
"eval_precision": 0.8265444670739986, |
|
"eval_recall": 0.7403466099118273, |
|
"eval_runtime": 209.7848, |
|
"eval_samples_per_second": 152.39, |
|
"eval_steps_per_second": 19.053, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.74e-05, |
|
"loss": 0.1129, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9564578185116832, |
|
"eval_f1": 0.7782733354571519, |
|
"eval_loss": 0.12270316481590271, |
|
"eval_precision": 0.8173302107728337, |
|
"eval_recall": 0.7427789601702646, |
|
"eval_runtime": 209.6077, |
|
"eval_samples_per_second": 152.518, |
|
"eval_steps_per_second": 19.069, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.1244, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9576777503206231, |
|
"eval_f1": 0.776769509981851, |
|
"eval_loss": 0.11879286915063858, |
|
"eval_precision": 0.8492063492063492, |
|
"eval_recall": 0.7157190635451505, |
|
"eval_runtime": 209.8694, |
|
"eval_samples_per_second": 152.328, |
|
"eval_steps_per_second": 19.045, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.62e-05, |
|
"loss": 0.1198, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.9573336669898964, |
|
"eval_f1": 0.7855345911949686, |
|
"eval_loss": 0.12149158120155334, |
|
"eval_precision": 0.8134158254640182, |
|
"eval_recall": 0.7595013681970204, |
|
"eval_runtime": 209.8421, |
|
"eval_samples_per_second": 152.348, |
|
"eval_steps_per_second": 19.048, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.56e-05, |
|
"loss": 0.1167, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9582095154681097, |
|
"eval_f1": 0.7829054273643159, |
|
"eval_loss": 0.11379627883434296, |
|
"eval_precision": 0.8408376963350785, |
|
"eval_recall": 0.7324414715719063, |
|
"eval_runtime": 210.0964, |
|
"eval_samples_per_second": 152.163, |
|
"eval_steps_per_second": 19.025, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1162, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9579592730457631, |
|
"eval_f1": 0.7870722433460076, |
|
"eval_loss": 0.11324156820774078, |
|
"eval_precision": 0.8217002977174992, |
|
"eval_recall": 0.7552447552447552, |
|
"eval_runtime": 210.2617, |
|
"eval_samples_per_second": 152.044, |
|
"eval_steps_per_second": 19.01, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.44e-05, |
|
"loss": 0.1202, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9577403109262098, |
|
"eval_f1": 0.785385226370135, |
|
"eval_loss": 0.11547254770994186, |
|
"eval_precision": 0.8223552894211577, |
|
"eval_recall": 0.7515962298570994, |
|
"eval_runtime": 209.2873, |
|
"eval_samples_per_second": 152.752, |
|
"eval_steps_per_second": 19.098, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3800000000000002e-05, |
|
"loss": 0.1069, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.9589289624323564, |
|
"eval_f1": 0.7883282282766404, |
|
"eval_loss": 0.11415638774633408, |
|
"eval_precision": 0.8390528483184626, |
|
"eval_recall": 0.7433870477348739, |
|
"eval_runtime": 209.3049, |
|
"eval_samples_per_second": 152.739, |
|
"eval_steps_per_second": 19.097, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.32e-05, |
|
"loss": 0.0866, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.9558947730614032, |
|
"eval_f1": 0.7875866224766495, |
|
"eval_loss": 0.12708334624767303, |
|
"eval_precision": 0.7805315019408778, |
|
"eval_recall": 0.7947704469443599, |
|
"eval_runtime": 208.8539, |
|
"eval_samples_per_second": 153.069, |
|
"eval_steps_per_second": 19.138, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.26e-05, |
|
"loss": 0.0913, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.9557696518502299, |
|
"eval_f1": 0.7855626326963907, |
|
"eval_loss": 0.12602387368679047, |
|
"eval_precision": 0.783661119515885, |
|
"eval_recall": 0.7874733961690483, |
|
"eval_runtime": 209.1412, |
|
"eval_samples_per_second": 152.858, |
|
"eval_steps_per_second": 19.111, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.087, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_accuracy": 0.95548812912509, |
|
"eval_f1": 0.7876436352783167, |
|
"eval_loss": 0.12803590297698975, |
|
"eval_precision": 0.7734466588511137, |
|
"eval_recall": 0.8023715415019763, |
|
"eval_runtime": 209.0203, |
|
"eval_samples_per_second": 152.947, |
|
"eval_steps_per_second": 19.123, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.1400000000000001e-05, |
|
"loss": 0.0839, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.958991523037943, |
|
"eval_f1": 0.7922016167379933, |
|
"eval_loss": 0.12513290345668793, |
|
"eval_precision": 0.8274834437086093, |
|
"eval_recall": 0.759805411979325, |
|
"eval_runtime": 209.1866, |
|
"eval_samples_per_second": 152.825, |
|
"eval_steps_per_second": 19.107, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.08e-05, |
|
"loss": 0.0889, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.9583033563764898, |
|
"eval_f1": 0.7890489001424276, |
|
"eval_loss": 0.12521126866340637, |
|
"eval_precision": 0.8227722772277227, |
|
"eval_recall": 0.7579811492854971, |
|
"eval_runtime": 209.1839, |
|
"eval_samples_per_second": 152.827, |
|
"eval_steps_per_second": 19.108, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.02e-05, |
|
"loss": 0.0887, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.9562701366949232, |
|
"eval_f1": 0.7879890809827116, |
|
"eval_loss": 0.12457986176013947, |
|
"eval_precision": 0.7860816944024206, |
|
"eval_recall": 0.7899057464274856, |
|
"eval_runtime": 209.6152, |
|
"eval_samples_per_second": 152.513, |
|
"eval_steps_per_second": 19.068, |
|
"step": 3300 |
|
} |
|
], |
|
"max_steps": 5000, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.279247411490296e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|