|
{ |
|
"best_metric": 2.0570528507232666, |
|
"best_model_checkpoint": "t5-base-snl/checkpoint-2890", |
|
"epoch": 19.0, |
|
"global_step": 3230, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9632352941176476e-05, |
|
"loss": 4.9792, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9264705882352944e-05, |
|
"loss": 3.7166, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.889705882352941e-05, |
|
"loss": 3.2528, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.8529411764705885e-05, |
|
"loss": 3.0823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.816176470588236e-05, |
|
"loss": 3.0381, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.7794117647058826e-05, |
|
"loss": 2.9943, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.976800976800977, |
|
"eval_loss": 2.2042253017425537, |
|
"eval_rouge1": 28.1135, |
|
"eval_rouge2": 13.7477, |
|
"eval_rougeL": 25.4842, |
|
"eval_rougeLsum": 26.6467, |
|
"eval_runtime": 22.885, |
|
"eval_samples_per_second": 35.788, |
|
"eval_steps_per_second": 2.272, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.742647058823529e-05, |
|
"loss": 2.9188, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 2.8824, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.669117647058824e-05, |
|
"loss": 2.8751, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.632352941176471e-05, |
|
"loss": 2.8037, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.5955882352941176e-05, |
|
"loss": 2.7824, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 2.7789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.522058823529412e-05, |
|
"loss": 2.7955, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.985347985347985, |
|
"eval_loss": 2.1561412811279297, |
|
"eval_rouge1": 28.5159, |
|
"eval_rouge2": 14.3492, |
|
"eval_rougeL": 26.0596, |
|
"eval_rougeLsum": 27.2431, |
|
"eval_runtime": 23.1378, |
|
"eval_samples_per_second": 35.397, |
|
"eval_steps_per_second": 2.247, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.485294117647059e-05, |
|
"loss": 2.7598, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.448529411764706e-05, |
|
"loss": 2.7091, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 2.7055, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.375e-05, |
|
"loss": 2.7163, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.3382352941176474e-05, |
|
"loss": 2.6844, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.301470588235295e-05, |
|
"loss": 2.686, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 2.6378, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 18.99145299145299, |
|
"eval_loss": 2.130974531173706, |
|
"eval_rouge1": 28.9554, |
|
"eval_rouge2": 14.6901, |
|
"eval_rougeL": 26.4208, |
|
"eval_rougeLsum": 27.5523, |
|
"eval_runtime": 23.1305, |
|
"eval_samples_per_second": 35.408, |
|
"eval_steps_per_second": 2.248, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.227941176470588e-05, |
|
"loss": 2.6666, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 4.1911764705882356e-05, |
|
"loss": 2.6372, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.154411764705883e-05, |
|
"loss": 2.6506, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 2.6104, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 4.0808823529411765e-05, |
|
"loss": 2.5946, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.044117647058824e-05, |
|
"loss": 2.6182, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 4.007352941176471e-05, |
|
"loss": 2.5962, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 18.99145299145299, |
|
"eval_loss": 2.1109659671783447, |
|
"eval_rouge1": 29.381, |
|
"eval_rouge2": 15.1503, |
|
"eval_rougeL": 26.8406, |
|
"eval_rougeLsum": 27.9653, |
|
"eval_runtime": 23.0996, |
|
"eval_samples_per_second": 35.455, |
|
"eval_steps_per_second": 2.251, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.970588235294117e-05, |
|
"loss": 2.5478, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.933823529411765e-05, |
|
"loss": 2.5601, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3.897058823529412e-05, |
|
"loss": 2.5793, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.8602941176470595e-05, |
|
"loss": 2.5655, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 2.5686, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.786764705882353e-05, |
|
"loss": 2.5704, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.5369, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.1019859313964844, |
|
"eval_rouge1": 29.5767, |
|
"eval_rouge2": 15.2692, |
|
"eval_rougeL": 27.0113, |
|
"eval_rougeLsum": 28.1849, |
|
"eval_runtime": 22.9206, |
|
"eval_samples_per_second": 35.732, |
|
"eval_steps_per_second": 2.269, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 3.713235294117647e-05, |
|
"loss": 2.5257, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 3.6764705882352945e-05, |
|
"loss": 2.5294, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 3.639705882352941e-05, |
|
"loss": 2.5188, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 3.6029411764705886e-05, |
|
"loss": 2.5164, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 3.566176470588235e-05, |
|
"loss": 2.4973, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 2.5103, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.090707302093506, |
|
"eval_rouge1": 29.6354, |
|
"eval_rouge2": 15.434, |
|
"eval_rougeL": 27.0893, |
|
"eval_rougeLsum": 28.2703, |
|
"eval_runtime": 22.9931, |
|
"eval_samples_per_second": 35.619, |
|
"eval_steps_per_second": 2.262, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.4926470588235294e-05, |
|
"loss": 2.4817, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 3.455882352941177e-05, |
|
"loss": 2.4662, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.4191176470588236e-05, |
|
"loss": 2.4879, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.382352941176471e-05, |
|
"loss": 2.4666, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 3.345588235294118e-05, |
|
"loss": 2.4908, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.308823529411765e-05, |
|
"loss": 2.4887, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 3.272058823529412e-05, |
|
"loss": 2.4524, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.0839579105377197, |
|
"eval_rouge1": 29.7812, |
|
"eval_rouge2": 15.4963, |
|
"eval_rougeL": 27.2779, |
|
"eval_rougeLsum": 28.385, |
|
"eval_runtime": 23.0064, |
|
"eval_samples_per_second": 35.599, |
|
"eval_steps_per_second": 2.26, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 2.4526, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.198529411764706e-05, |
|
"loss": 2.4316, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 3.161764705882353e-05, |
|
"loss": 2.4511, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.4642, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 2.4387, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.0514705882352945e-05, |
|
"loss": 2.477, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 3.0147058823529413e-05, |
|
"loss": 2.4472, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.0799622535705566, |
|
"eval_rouge1": 29.6011, |
|
"eval_rouge2": 15.5138, |
|
"eval_rougeL": 27.1381, |
|
"eval_rougeLsum": 28.2799, |
|
"eval_runtime": 22.9827, |
|
"eval_samples_per_second": 35.636, |
|
"eval_steps_per_second": 2.263, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 2.9779411764705883e-05, |
|
"loss": 2.4296, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 2.4109, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 2.9044117647058828e-05, |
|
"loss": 2.4181, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 2.8676470588235295e-05, |
|
"loss": 2.4089, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 2.8308823529411766e-05, |
|
"loss": 2.4518, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 2.4271, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.757352941176471e-05, |
|
"loss": 2.4089, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.075223207473755, |
|
"eval_rouge1": 29.7647, |
|
"eval_rouge2": 15.6183, |
|
"eval_rougeL": 27.318, |
|
"eval_rougeLsum": 28.4747, |
|
"eval_runtime": 22.8902, |
|
"eval_samples_per_second": 35.779, |
|
"eval_steps_per_second": 2.272, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 2.7205882352941174e-05, |
|
"loss": 2.4048, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.6838235294117648e-05, |
|
"loss": 2.4132, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 2.3885, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.6102941176470593e-05, |
|
"loss": 2.4007, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 2.5735294117647057e-05, |
|
"loss": 2.4089, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 2.536764705882353e-05, |
|
"loss": 2.3912, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.4011, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.071033239364624, |
|
"eval_rouge1": 29.6533, |
|
"eval_rouge2": 15.5536, |
|
"eval_rougeL": 27.2687, |
|
"eval_rougeLsum": 28.4457, |
|
"eval_runtime": 23.0214, |
|
"eval_samples_per_second": 35.576, |
|
"eval_steps_per_second": 2.259, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 2.4632352941176472e-05, |
|
"loss": 2.4049, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 2.4264705882352942e-05, |
|
"loss": 2.3802, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 2.3897058823529413e-05, |
|
"loss": 2.3688, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.3897, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 2.3161764705882354e-05, |
|
"loss": 2.3464, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 2.2794117647058825e-05, |
|
"loss": 2.3792, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0655674934387207, |
|
"eval_rouge1": 29.8668, |
|
"eval_rouge2": 15.6931, |
|
"eval_rougeL": 27.4208, |
|
"eval_rougeLsum": 28.5477, |
|
"eval_runtime": 21.951, |
|
"eval_samples_per_second": 37.31, |
|
"eval_steps_per_second": 2.369, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 2.2426470588235296e-05, |
|
"loss": 2.3783, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 2.3446, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 2.1691176470588237e-05, |
|
"loss": 2.3929, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 2.1323529411764707e-05, |
|
"loss": 2.374, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 2.0955882352941178e-05, |
|
"loss": 2.3544, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 2.357, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 2.022058823529412e-05, |
|
"loss": 2.3588, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.0634803771972656, |
|
"eval_rouge1": 29.8378, |
|
"eval_rouge2": 15.682, |
|
"eval_rougeL": 27.4635, |
|
"eval_rougeLsum": 28.5803, |
|
"eval_runtime": 22.98, |
|
"eval_samples_per_second": 35.64, |
|
"eval_steps_per_second": 2.263, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 1.9852941176470586e-05, |
|
"loss": 2.3503, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 1.948529411764706e-05, |
|
"loss": 2.3402, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 2.3716, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 2.3161, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 1.8382352941176472e-05, |
|
"loss": 2.3354, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 1.8014705882352943e-05, |
|
"loss": 2.3476, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 2.3397, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0630440711975098, |
|
"eval_rouge1": 29.9043, |
|
"eval_rouge2": 15.7535, |
|
"eval_rougeL": 27.5065, |
|
"eval_rougeLsum": 28.6539, |
|
"eval_runtime": 22.9094, |
|
"eval_samples_per_second": 35.75, |
|
"eval_steps_per_second": 2.27, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.7279411764705884e-05, |
|
"loss": 2.3399, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 1.6911764705882355e-05, |
|
"loss": 2.3207, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 1.6544117647058825e-05, |
|
"loss": 2.3339, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 2.3347, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 1.5808823529411763e-05, |
|
"loss": 2.3318, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 1.5441176470588237e-05, |
|
"loss": 2.3275, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 1.5073529411764706e-05, |
|
"loss": 2.3201, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 18.996336996336996, |
|
"eval_loss": 2.0599966049194336, |
|
"eval_rouge1": 29.7926, |
|
"eval_rouge2": 15.7077, |
|
"eval_rougeL": 27.4066, |
|
"eval_rougeLsum": 28.5302, |
|
"eval_runtime": 23.1182, |
|
"eval_samples_per_second": 35.427, |
|
"eval_steps_per_second": 2.249, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 2.3204, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 1.4338235294117647e-05, |
|
"loss": 2.3592, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 1.3970588235294118e-05, |
|
"loss": 2.3275, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 1.3602941176470587e-05, |
|
"loss": 2.2936, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 2.3013, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 1.2867647058823528e-05, |
|
"loss": 2.3007, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.3241, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0615200996398926, |
|
"eval_rouge1": 29.8536, |
|
"eval_rouge2": 15.7929, |
|
"eval_rougeL": 27.4572, |
|
"eval_rougeLsum": 28.5704, |
|
"eval_runtime": 22.9087, |
|
"eval_samples_per_second": 35.751, |
|
"eval_steps_per_second": 2.27, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 1.2132352941176471e-05, |
|
"loss": 2.326, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 2.3004, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 1.1397058823529412e-05, |
|
"loss": 2.311, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 1.1029411764705883e-05, |
|
"loss": 2.3427, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 1.0661764705882354e-05, |
|
"loss": 2.2741, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 1.0294117647058824e-05, |
|
"loss": 2.3183, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0573582649230957, |
|
"eval_rouge1": 29.7529, |
|
"eval_rouge2": 15.6729, |
|
"eval_rougeL": 27.3388, |
|
"eval_rougeLsum": 28.4678, |
|
"eval_runtime": 23.1299, |
|
"eval_samples_per_second": 35.409, |
|
"eval_steps_per_second": 2.248, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 9.926470588235293e-06, |
|
"loss": 2.2934, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 9.558823529411764e-06, |
|
"loss": 2.2633, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 9.191176470588236e-06, |
|
"loss": 2.2957, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 2.3083, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 8.455882352941177e-06, |
|
"loss": 2.3246, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 8.088235294117648e-06, |
|
"loss": 2.2989, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 7.720588235294119e-06, |
|
"loss": 2.3346, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0570528507232666, |
|
"eval_rouge1": 29.7443, |
|
"eval_rouge2": 15.6459, |
|
"eval_rougeL": 27.3245, |
|
"eval_rougeLsum": 28.4549, |
|
"eval_runtime": 22.9331, |
|
"eval_samples_per_second": 35.713, |
|
"eval_steps_per_second": 2.267, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 7.3529411764705884e-06, |
|
"loss": 2.2887, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 6.985294117647059e-06, |
|
"loss": 2.2881, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 6.61764705882353e-06, |
|
"loss": 2.3062, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.2867, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 2.3056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 5.5147058823529415e-06, |
|
"loss": 2.3098, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 5.147058823529412e-06, |
|
"loss": 2.2932, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0577263832092285, |
|
"eval_rouge1": 29.7467, |
|
"eval_rouge2": 15.6717, |
|
"eval_rougeL": 27.3391, |
|
"eval_rougeLsum": 28.4541, |
|
"eval_runtime": 23.0624, |
|
"eval_samples_per_second": 35.512, |
|
"eval_steps_per_second": 2.255, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 4.779411764705882e-06, |
|
"loss": 2.2832, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 2.289, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 4.044117647058824e-06, |
|
"loss": 2.2932, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 3.6764705882352942e-06, |
|
"loss": 2.3085, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 3.308823529411765e-06, |
|
"loss": 2.2884, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 2.2877, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 2.573529411764706e-06, |
|
"loss": 2.2755, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.0573978424072266, |
|
"eval_rouge1": 29.7694, |
|
"eval_rouge2": 15.6776, |
|
"eval_rougeL": 27.3556, |
|
"eval_rougeLsum": 28.4819, |
|
"eval_runtime": 22.951, |
|
"eval_samples_per_second": 35.685, |
|
"eval_steps_per_second": 2.266, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"step": 3230, |
|
"total_flos": 2.285170027491492e+17, |
|
"train_loss": 2.4905450729393737, |
|
"train_runtime": 8327.4677, |
|
"train_samples_per_second": 26.116, |
|
"train_steps_per_second": 0.408 |
|
} |
|
], |
|
"max_steps": 3400, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.285170027491492e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|