|
{ |
|
"best_metric": 63.1011, |
|
"best_model_checkpoint": "output_train_bart_large_local/checkpoint-48000", |
|
"epoch": 2.0123153700647967, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9842240824211213e-07, |
|
"loss": 1.8309, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.008692852543465e-07, |
|
"loss": 1.0322, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bertscore/f1": 0.7264, |
|
"eval_bertscore/precision": 0.7345, |
|
"eval_bertscore/recall": 0.7214, |
|
"eval_mean_prediction_length_characters": 779.505, |
|
"eval_mean_prediction_length_tokens": 167.969, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 45.7604, |
|
"eval_rouge/rouge1": 60.4328, |
|
"eval_rouge/rouge2": 39.8155, |
|
"eval_rouge/rougeL": 39.824, |
|
"eval_rouge/rougeLsum": 57.577, |
|
"eval_runtime": 2363.9821, |
|
"eval_samples_per_second": 0.423, |
|
"eval_steps_per_second": 0.423, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.203316162266581e-06, |
|
"loss": 0.8078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6057630392788153e-06, |
|
"loss": 0.7401, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bertscore/f1": 0.767, |
|
"eval_bertscore/precision": 0.7735, |
|
"eval_bertscore/recall": 0.7627, |
|
"eval_mean_prediction_length_characters": 746.987, |
|
"eval_mean_prediction_length_tokens": 167.637, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 49.7632, |
|
"eval_rouge/rouge1": 64.5596, |
|
"eval_rouge/rouge2": 43.5883, |
|
"eval_rouge/rougeL": 43.7919, |
|
"eval_rouge/rougeLsum": 62.268, |
|
"eval_runtime": 2390.2778, |
|
"eval_samples_per_second": 0.418, |
|
"eval_steps_per_second": 0.418, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.00820991629105e-06, |
|
"loss": 0.6952, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.4106567933032843e-06, |
|
"loss": 0.6654, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bertscore/f1": 0.775, |
|
"eval_bertscore/precision": 0.7801, |
|
"eval_bertscore/recall": 0.7719, |
|
"eval_mean_prediction_length_characters": 750.739, |
|
"eval_mean_prediction_length_tokens": 167.441, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 51.1255, |
|
"eval_rouge/rouge1": 65.8167, |
|
"eval_rouge/rouge2": 45.0215, |
|
"eval_rouge/rougeL": 45.0979, |
|
"eval_rouge/rougeLsum": 63.4694, |
|
"eval_runtime": 2400.8698, |
|
"eval_samples_per_second": 0.417, |
|
"eval_steps_per_second": 0.417, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.8131036703155183e-06, |
|
"loss": 0.6532, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.215550547327753e-06, |
|
"loss": 0.6421, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bertscore/f1": 0.778, |
|
"eval_bertscore/precision": 0.7803, |
|
"eval_bertscore/recall": 0.7778, |
|
"eval_mean_prediction_length_characters": 787.437, |
|
"eval_mean_prediction_length_tokens": 175.145, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 51.6325, |
|
"eval_rouge/rouge1": 66.6546, |
|
"eval_rouge/rouge2": 45.5613, |
|
"eval_rouge/rougeL": 45.3256, |
|
"eval_rouge/rougeLsum": 64.2944, |
|
"eval_runtime": 2500.2502, |
|
"eval_samples_per_second": 0.4, |
|
"eval_steps_per_second": 0.4, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.6179974243399875e-06, |
|
"loss": 0.6253, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.0196394075981975e-06, |
|
"loss": 0.62, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_bertscore/f1": 0.7774, |
|
"eval_bertscore/precision": 0.7652, |
|
"eval_bertscore/recall": 0.7921, |
|
"eval_mean_prediction_length_characters": 948.673, |
|
"eval_mean_prediction_length_tokens": 206.93, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 51.4967, |
|
"eval_rouge/rouge1": 67.0983, |
|
"eval_rouge/rouge2": 45.6949, |
|
"eval_rouge/rougeL": 44.541, |
|
"eval_rouge/rougeLsum": 64.5765, |
|
"eval_runtime": 2980.0062, |
|
"eval_samples_per_second": 0.336, |
|
"eval_steps_per_second": 0.336, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.4220862846104314e-06, |
|
"loss": 0.6041, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.824533161622666e-06, |
|
"loss": 0.5949, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bertscore/f1": 0.7803, |
|
"eval_bertscore/precision": 0.7732, |
|
"eval_bertscore/recall": 0.7895, |
|
"eval_mean_prediction_length_characters": 879.72, |
|
"eval_mean_prediction_length_tokens": 192.817, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 52.1495, |
|
"eval_rouge/rouge1": 67.3707, |
|
"eval_rouge/rouge2": 46.4072, |
|
"eval_rouge/rougeL": 45.3622, |
|
"eval_rouge/rougeLsum": 65.0424, |
|
"eval_runtime": 2666.0858, |
|
"eval_samples_per_second": 0.375, |
|
"eval_steps_per_second": 0.375, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.2269800386349e-06, |
|
"loss": 0.5762, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.629426915647135e-06, |
|
"loss": 0.5719, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_bertscore/f1": 0.7838, |
|
"eval_bertscore/precision": 0.7832, |
|
"eval_bertscore/recall": 0.7866, |
|
"eval_mean_prediction_length_characters": 832.624, |
|
"eval_mean_prediction_length_tokens": 183.52, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 52.9627, |
|
"eval_rouge/rouge1": 67.8554, |
|
"eval_rouge/rouge2": 47.0328, |
|
"eval_rouge/rougeL": 46.5505, |
|
"eval_rouge/rougeLsum": 65.5148, |
|
"eval_runtime": 2493.7707, |
|
"eval_samples_per_second": 0.401, |
|
"eval_steps_per_second": 0.401, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.031068898905345e-06, |
|
"loss": 0.5718, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.433515775917579e-06, |
|
"loss": 0.5541, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bertscore/f1": 0.7856, |
|
"eval_bertscore/precision": 0.7835, |
|
"eval_bertscore/recall": 0.7897, |
|
"eval_mean_prediction_length_characters": 831.437, |
|
"eval_mean_prediction_length_tokens": 184.28, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 53.234, |
|
"eval_rouge/rouge1": 68.2648, |
|
"eval_rouge/rouge2": 47.5788, |
|
"eval_rouge/rougeL": 46.447, |
|
"eval_rouge/rougeLsum": 65.9056, |
|
"eval_runtime": 2476.0861, |
|
"eval_samples_per_second": 0.404, |
|
"eval_steps_per_second": 0.404, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.835962652929814e-06, |
|
"loss": 0.5621, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.238409529942049e-06, |
|
"loss": 0.5509, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_bertscore/f1": 0.7876, |
|
"eval_bertscore/precision": 0.787, |
|
"eval_bertscore/recall": 0.79, |
|
"eval_mean_prediction_length_characters": 825.034, |
|
"eval_mean_prediction_length_tokens": 181.78, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 53.6596, |
|
"eval_rouge/rouge1": 68.4619, |
|
"eval_rouge/rouge2": 47.9553, |
|
"eval_rouge/rougeL": 47.0605, |
|
"eval_rouge/rougeLsum": 66.229, |
|
"eval_runtime": 2413.4877, |
|
"eval_samples_per_second": 0.414, |
|
"eval_steps_per_second": 0.414, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.640856406954282e-06, |
|
"loss": 0.5319, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.042498390212493e-06, |
|
"loss": 0.5317, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bertscore/f1": 0.7801, |
|
"eval_bertscore/precision": 0.7653, |
|
"eval_bertscore/recall": 0.7976, |
|
"eval_mean_prediction_length_characters": 983.333, |
|
"eval_mean_prediction_length_tokens": 212.132, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 52.2204, |
|
"eval_rouge/rouge1": 67.4114, |
|
"eval_rouge/rouge2": 46.8366, |
|
"eval_rouge/rougeL": 45.1025, |
|
"eval_rouge/rougeLsum": 64.9737, |
|
"eval_runtime": 2850.8965, |
|
"eval_samples_per_second": 0.351, |
|
"eval_steps_per_second": 0.351, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.444945267224727e-06, |
|
"loss": 0.5246, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.847392144236962e-06, |
|
"loss": 0.5306, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bertscore/f1": 0.7812, |
|
"eval_bertscore/precision": 0.7727, |
|
"eval_bertscore/recall": 0.7923, |
|
"eval_mean_prediction_length_characters": 929.756, |
|
"eval_mean_prediction_length_tokens": 198.992, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 52.9021, |
|
"eval_rouge/rouge1": 67.7205, |
|
"eval_rouge/rouge2": 47.5736, |
|
"eval_rouge/rougeL": 45.955, |
|
"eval_rouge/rougeLsum": 65.2918, |
|
"eval_runtime": 2641.4953, |
|
"eval_samples_per_second": 0.379, |
|
"eval_steps_per_second": 0.379, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.249839021249196e-06, |
|
"loss": 0.5118, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.65228589826143e-06, |
|
"loss": 0.5125, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bertscore/f1": 0.7924, |
|
"eval_bertscore/precision": 0.8025, |
|
"eval_bertscore/recall": 0.7847, |
|
"eval_mean_prediction_length_characters": 739.515, |
|
"eval_mean_prediction_length_tokens": 166.617, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 54.3827, |
|
"eval_rouge/rouge1": 68.5657, |
|
"eval_rouge/rouge2": 48.8496, |
|
"eval_rouge/rougeL": 48.0192, |
|
"eval_rouge/rougeLsum": 66.6464, |
|
"eval_runtime": 2173.7087, |
|
"eval_samples_per_second": 0.46, |
|
"eval_steps_per_second": 0.46, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.993918580525149e-06, |
|
"loss": 0.4953, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.949202260857123e-06, |
|
"loss": 0.4559, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_bertscore/f1": 0.7876, |
|
"eval_bertscore/precision": 0.782, |
|
"eval_bertscore/recall": 0.7953, |
|
"eval_mean_prediction_length_characters": 872.104, |
|
"eval_mean_prediction_length_tokens": 188.836, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 54.0286, |
|
"eval_rouge/rouge1": 68.9929, |
|
"eval_rouge/rouge2": 48.683, |
|
"eval_rouge/rougeL": 46.9557, |
|
"eval_rouge/rougeLsum": 66.5892, |
|
"eval_runtime": 2531.5352, |
|
"eval_samples_per_second": 0.395, |
|
"eval_steps_per_second": 0.395, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.904575373828433e-06, |
|
"loss": 0.4572, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.859859054160406e-06, |
|
"loss": 0.4455, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_bertscore/f1": 0.792, |
|
"eval_bertscore/precision": 0.7832, |
|
"eval_bertscore/recall": 0.8031, |
|
"eval_mean_prediction_length_characters": 921.871, |
|
"eval_mean_prediction_length_tokens": 200.026, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 54.5933, |
|
"eval_rouge/rouge1": 69.3876, |
|
"eval_rouge/rouge2": 49.1971, |
|
"eval_rouge/rougeL": 47.6645, |
|
"eval_rouge/rougeLsum": 67.1636, |
|
"eval_runtime": 2729.0713, |
|
"eval_samples_per_second": 0.366, |
|
"eval_steps_per_second": 0.366, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.81514273449238e-06, |
|
"loss": 0.446, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.770426414824355e-06, |
|
"loss": 0.4376, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_bertscore/f1": 0.7943, |
|
"eval_bertscore/precision": 0.7909, |
|
"eval_bertscore/recall": 0.7996, |
|
"eval_mean_prediction_length_characters": 856.751, |
|
"eval_mean_prediction_length_tokens": 186.271, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 55.0336, |
|
"eval_rouge/rouge1": 69.6177, |
|
"eval_rouge/rouge2": 49.6842, |
|
"eval_rouge/rougeL": 48.1889, |
|
"eval_rouge/rougeLsum": 67.4597, |
|
"eval_runtime": 2554.3498, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.391, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.725710095156328e-06, |
|
"loss": 0.4338, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.680993775488303e-06, |
|
"loss": 0.4333, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_bertscore/f1": 0.7941, |
|
"eval_bertscore/precision": 0.7901, |
|
"eval_bertscore/recall": 0.8003, |
|
"eval_mean_prediction_length_characters": 857.762, |
|
"eval_mean_prediction_length_tokens": 185.042, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 55.3969, |
|
"eval_rouge/rouge1": 69.5818, |
|
"eval_rouge/rouge2": 50.0439, |
|
"eval_rouge/rougeL": 48.8213, |
|
"eval_rouge/rougeLsum": 67.3532, |
|
"eval_runtime": 2568.5908, |
|
"eval_samples_per_second": 0.389, |
|
"eval_steps_per_second": 0.389, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 9.636366888459613e-06, |
|
"loss": 0.4224, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.591650568791587e-06, |
|
"loss": 0.4229, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_bertscore/f1": 0.7968, |
|
"eval_bertscore/precision": 0.7936, |
|
"eval_bertscore/recall": 0.802, |
|
"eval_mean_prediction_length_characters": 850.064, |
|
"eval_mean_prediction_length_tokens": 186.438, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 56.2479, |
|
"eval_rouge/rouge1": 70.2657, |
|
"eval_rouge/rouge2": 50.9461, |
|
"eval_rouge/rougeL": 49.7123, |
|
"eval_rouge/rougeLsum": 68.1758, |
|
"eval_runtime": 2596.5556, |
|
"eval_samples_per_second": 0.385, |
|
"eval_steps_per_second": 0.385, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.54693424912356e-06, |
|
"loss": 0.4215, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.502217929455535e-06, |
|
"loss": 0.4145, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_bertscore/f1": 0.7982, |
|
"eval_bertscore/precision": 0.7974, |
|
"eval_bertscore/recall": 0.801, |
|
"eval_mean_prediction_length_characters": 821.211, |
|
"eval_mean_prediction_length_tokens": 179.198, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 56.2997, |
|
"eval_rouge/rouge1": 70.2035, |
|
"eval_rouge/rouge2": 51.1438, |
|
"eval_rouge/rougeL": 49.7011, |
|
"eval_rouge/rougeLsum": 68.1054, |
|
"eval_runtime": 2500.5637, |
|
"eval_samples_per_second": 0.4, |
|
"eval_steps_per_second": 0.4, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.457501609787508e-06, |
|
"loss": 0.4108, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.41287472275882e-06, |
|
"loss": 0.413, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_bertscore/f1": 0.7981, |
|
"eval_bertscore/precision": 0.7992, |
|
"eval_bertscore/recall": 0.799, |
|
"eval_mean_prediction_length_characters": 806.834, |
|
"eval_mean_prediction_length_tokens": 176.24, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 56.5708, |
|
"eval_rouge/rouge1": 70.2439, |
|
"eval_rouge/rouge2": 51.4876, |
|
"eval_rouge/rougeL": 50.0572, |
|
"eval_rouge/rougeLsum": 68.1941, |
|
"eval_runtime": 2443.1073, |
|
"eval_samples_per_second": 0.409, |
|
"eval_steps_per_second": 0.409, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.368158403090792e-06, |
|
"loss": 0.4064, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.323442083422767e-06, |
|
"loss": 0.4004, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_bertscore/f1": 0.8022, |
|
"eval_bertscore/precision": 0.7971, |
|
"eval_bertscore/recall": 0.8093, |
|
"eval_mean_prediction_length_characters": 860.077, |
|
"eval_mean_prediction_length_tokens": 188.397, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 57.2862, |
|
"eval_rouge/rouge1": 71.0513, |
|
"eval_rouge/rouge2": 52.0715, |
|
"eval_rouge/rougeL": 50.8134, |
|
"eval_rouge/rougeLsum": 69.0639, |
|
"eval_runtime": 2596.2685, |
|
"eval_samples_per_second": 0.385, |
|
"eval_steps_per_second": 0.385, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.278725763754741e-06, |
|
"loss": 0.3953, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.234009444086714e-06, |
|
"loss": 0.3905, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_bertscore/f1": 0.8009, |
|
"eval_bertscore/precision": 0.792, |
|
"eval_bertscore/recall": 0.812, |
|
"eval_mean_prediction_length_characters": 922.278, |
|
"eval_mean_prediction_length_tokens": 199.619, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 56.8514, |
|
"eval_rouge/rouge1": 70.8436, |
|
"eval_rouge/rouge2": 51.8156, |
|
"eval_rouge/rougeL": 50.0567, |
|
"eval_rouge/rougeLsum": 68.8517, |
|
"eval_runtime": 2741.6372, |
|
"eval_samples_per_second": 0.365, |
|
"eval_steps_per_second": 0.365, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.189382557058024e-06, |
|
"loss": 0.3834, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 9.144666237389999e-06, |
|
"loss": 0.3772, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_bertscore/f1": 0.802, |
|
"eval_bertscore/precision": 0.8014, |
|
"eval_bertscore/recall": 0.8048, |
|
"eval_mean_prediction_length_characters": 821.195, |
|
"eval_mean_prediction_length_tokens": 179.279, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 57.3719, |
|
"eval_rouge/rouge1": 70.4754, |
|
"eval_rouge/rouge2": 52.2981, |
|
"eval_rouge/rougeL": 51.2358, |
|
"eval_rouge/rougeLsum": 68.5129, |
|
"eval_runtime": 2484.708, |
|
"eval_samples_per_second": 0.402, |
|
"eval_steps_per_second": 0.402, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 9.099949917721973e-06, |
|
"loss": 0.3796, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.055233598053946e-06, |
|
"loss": 0.3745, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_bertscore/f1": 0.8077, |
|
"eval_bertscore/precision": 0.8096, |
|
"eval_bertscore/recall": 0.8076, |
|
"eval_mean_prediction_length_characters": 797.37, |
|
"eval_mean_prediction_length_tokens": 176.263, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 58.2667, |
|
"eval_rouge/rouge1": 71.7377, |
|
"eval_rouge/rouge2": 53.2732, |
|
"eval_rouge/rougeL": 51.7614, |
|
"eval_rouge/rougeLsum": 69.9972, |
|
"eval_runtime": 2452.4348, |
|
"eval_samples_per_second": 0.408, |
|
"eval_steps_per_second": 0.408, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.01051727838592e-06, |
|
"loss": 0.3705, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.96589039135723e-06, |
|
"loss": 0.3709, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_bertscore/f1": 0.8108, |
|
"eval_bertscore/precision": 0.8141, |
|
"eval_bertscore/recall": 0.8094, |
|
"eval_mean_prediction_length_characters": 798.81, |
|
"eval_mean_prediction_length_tokens": 176.217, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 58.6206, |
|
"eval_rouge/rouge1": 72.0193, |
|
"eval_rouge/rouge2": 53.6884, |
|
"eval_rouge/rougeL": 52.098, |
|
"eval_rouge/rougeLsum": 70.3419, |
|
"eval_runtime": 2449.0615, |
|
"eval_samples_per_second": 0.408, |
|
"eval_steps_per_second": 0.408, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.92126350432854e-06, |
|
"loss": 0.3638, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 8.876547184660515e-06, |
|
"loss": 0.3461, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_bertscore/f1": 0.8092, |
|
"eval_bertscore/precision": 0.8022, |
|
"eval_bertscore/recall": 0.8182, |
|
"eval_mean_prediction_length_characters": 891.59, |
|
"eval_mean_prediction_length_tokens": 193.579, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 58.5159, |
|
"eval_rouge/rouge1": 72.4344, |
|
"eval_rouge/rouge2": 54.0016, |
|
"eval_rouge/rougeL": 51.2235, |
|
"eval_rouge/rougeLsum": 70.5176, |
|
"eval_runtime": 2696.2641, |
|
"eval_samples_per_second": 0.371, |
|
"eval_steps_per_second": 0.371, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.83183086499249e-06, |
|
"loss": 0.2926, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.787114545324463e-06, |
|
"loss": 0.2935, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_bertscore/f1": 0.8099, |
|
"eval_bertscore/precision": 0.806, |
|
"eval_bertscore/recall": 0.8158, |
|
"eval_mean_prediction_length_characters": 858.323, |
|
"eval_mean_prediction_length_tokens": 186.494, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 59.0206, |
|
"eval_rouge/rouge1": 72.3233, |
|
"eval_rouge/rouge2": 54.4823, |
|
"eval_rouge/rougeL": 52.1769, |
|
"eval_rouge/rougeLsum": 70.5191, |
|
"eval_runtime": 2616.4872, |
|
"eval_samples_per_second": 0.382, |
|
"eval_steps_per_second": 0.382, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.742398225656437e-06, |
|
"loss": 0.2945, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.69768190598841e-06, |
|
"loss": 0.2859, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_bertscore/f1": 0.8021, |
|
"eval_bertscore/precision": 0.7967, |
|
"eval_bertscore/recall": 0.8098, |
|
"eval_mean_prediction_length_characters": 862.623, |
|
"eval_mean_prediction_length_tokens": 187.108, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 58.0972, |
|
"eval_rouge/rouge1": 71.3169, |
|
"eval_rouge/rouge2": 53.5502, |
|
"eval_rouge/rougeL": 51.3466, |
|
"eval_rouge/rougeLsum": 69.328, |
|
"eval_runtime": 2591.3634, |
|
"eval_samples_per_second": 0.386, |
|
"eval_steps_per_second": 0.386, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.65305501895972e-06, |
|
"loss": 0.2913, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.608338699291695e-06, |
|
"loss": 0.2871, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_bertscore/f1": 0.8125, |
|
"eval_bertscore/precision": 0.81, |
|
"eval_bertscore/recall": 0.8167, |
|
"eval_mean_prediction_length_characters": 839.348, |
|
"eval_mean_prediction_length_tokens": 182.08, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 59.5943, |
|
"eval_rouge/rouge1": 72.7757, |
|
"eval_rouge/rouge2": 55.0591, |
|
"eval_rouge/rougeL": 52.82, |
|
"eval_rouge/rougeLsum": 70.9482, |
|
"eval_runtime": 2411.5587, |
|
"eval_samples_per_second": 0.415, |
|
"eval_steps_per_second": 0.415, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.563622379623669e-06, |
|
"loss": 0.2858, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.518906059955642e-06, |
|
"loss": 0.2849, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_bertscore/f1": 0.8151, |
|
"eval_bertscore/precision": 0.8177, |
|
"eval_bertscore/recall": 0.8143, |
|
"eval_mean_prediction_length_characters": 801.734, |
|
"eval_mean_prediction_length_tokens": 175.958, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.3823, |
|
"eval_rouge/rouge1": 73.1311, |
|
"eval_rouge/rouge2": 56.0185, |
|
"eval_rouge/rougeL": 53.7397, |
|
"eval_rouge/rougeLsum": 71.4246, |
|
"eval_runtime": 2330.5152, |
|
"eval_samples_per_second": 0.429, |
|
"eval_steps_per_second": 0.429, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.474189740287617e-06, |
|
"loss": 0.2787, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.429473420619591e-06, |
|
"loss": 0.2819, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_bertscore/f1": 0.8142, |
|
"eval_bertscore/precision": 0.8077, |
|
"eval_bertscore/recall": 0.8228, |
|
"eval_mean_prediction_length_characters": 894.063, |
|
"eval_mean_prediction_length_tokens": 192.74, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.1592, |
|
"eval_rouge/rouge1": 73.1791, |
|
"eval_rouge/rouge2": 55.8507, |
|
"eval_rouge/rougeL": 53.2711, |
|
"eval_rouge/rougeLsum": 71.4419, |
|
"eval_runtime": 2537.8153, |
|
"eval_samples_per_second": 0.394, |
|
"eval_steps_per_second": 0.394, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.384757100951564e-06, |
|
"loss": 0.2799, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.340040781283539e-06, |
|
"loss": 0.2776, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_bertscore/f1": 0.8054, |
|
"eval_bertscore/precision": 0.7925, |
|
"eval_bertscore/recall": 0.821, |
|
"eval_mean_prediction_length_characters": 967.121, |
|
"eval_mean_prediction_length_tokens": 206.944, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 58.7521, |
|
"eval_rouge/rouge1": 71.9074, |
|
"eval_rouge/rouge2": 54.5099, |
|
"eval_rouge/rougeL": 51.7393, |
|
"eval_rouge/rougeLsum": 69.9728, |
|
"eval_runtime": 2713.6577, |
|
"eval_samples_per_second": 0.369, |
|
"eval_steps_per_second": 0.369, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.295413894254849e-06, |
|
"loss": 0.2761, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 8.250697574586823e-06, |
|
"loss": 0.2742, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_bertscore/f1": 0.8187, |
|
"eval_bertscore/precision": 0.8125, |
|
"eval_bertscore/recall": 0.827, |
|
"eval_mean_prediction_length_characters": 880.74, |
|
"eval_mean_prediction_length_tokens": 191.221, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.9358, |
|
"eval_rouge/rouge1": 73.8216, |
|
"eval_rouge/rouge2": 56.8015, |
|
"eval_rouge/rougeL": 53.9603, |
|
"eval_rouge/rougeLsum": 72.1193, |
|
"eval_runtime": 2491.9164, |
|
"eval_samples_per_second": 0.401, |
|
"eval_steps_per_second": 0.401, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.205981254918796e-06, |
|
"loss": 0.2737, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.16126493525077e-06, |
|
"loss": 0.2663, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_bertscore/f1": 0.8215, |
|
"eval_bertscore/precision": 0.8174, |
|
"eval_bertscore/recall": 0.8272, |
|
"eval_mean_prediction_length_characters": 862.393, |
|
"eval_mean_prediction_length_tokens": 187.552, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.7551, |
|
"eval_rouge/rouge1": 74.5811, |
|
"eval_rouge/rouge2": 57.6565, |
|
"eval_rouge/rougeL": 54.7699, |
|
"eval_rouge/rougeLsum": 72.9841, |
|
"eval_runtime": 2430.2491, |
|
"eval_samples_per_second": 0.411, |
|
"eval_steps_per_second": 0.411, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.116548615582743e-06, |
|
"loss": 0.2636, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.071921728554053e-06, |
|
"loss": 0.2672, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_bertscore/f1": 0.8194, |
|
"eval_bertscore/precision": 0.8133, |
|
"eval_bertscore/recall": 0.8274, |
|
"eval_mean_prediction_length_characters": 893.688, |
|
"eval_mean_prediction_length_tokens": 193.593, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.3651, |
|
"eval_rouge/rouge1": 74.2269, |
|
"eval_rouge/rouge2": 57.4497, |
|
"eval_rouge/rougeL": 54.1897, |
|
"eval_rouge/rougeLsum": 72.5834, |
|
"eval_runtime": 2499.9317, |
|
"eval_samples_per_second": 0.4, |
|
"eval_steps_per_second": 0.4, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.569096202090125e-06, |
|
"loss": 0.2366, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.54673714242274e-06, |
|
"loss": 0.2405, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_bertscore/f1": 0.8145, |
|
"eval_bertscore/precision": 0.8054, |
|
"eval_bertscore/recall": 0.8256, |
|
"eval_mean_prediction_length_characters": 927.422, |
|
"eval_mean_prediction_length_tokens": 200.478, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.0655, |
|
"eval_rouge/rouge1": 73.2741, |
|
"eval_rouge/rouge2": 55.8854, |
|
"eval_rouge/rougeL": 52.9208, |
|
"eval_rouge/rougeLsum": 71.6192, |
|
"eval_runtime": 2620.9045, |
|
"eval_samples_per_second": 0.382, |
|
"eval_steps_per_second": 0.382, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.524378082755352e-06, |
|
"loss": 0.2489, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.502019023087967e-06, |
|
"loss": 0.2428, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_bertscore/f1": 0.8171, |
|
"eval_bertscore/precision": 0.8136, |
|
"eval_bertscore/recall": 0.8226, |
|
"eval_mean_prediction_length_characters": 847.398, |
|
"eval_mean_prediction_length_tokens": 183.753, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.8081, |
|
"eval_rouge/rouge1": 73.4983, |
|
"eval_rouge/rouge2": 56.5892, |
|
"eval_rouge/rougeL": 54.0596, |
|
"eval_rouge/rougeLsum": 71.9013, |
|
"eval_runtime": 2373.5033, |
|
"eval_samples_per_second": 0.421, |
|
"eval_steps_per_second": 0.421, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.47965996342058e-06, |
|
"loss": 0.2511, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.457300903753192e-06, |
|
"loss": 0.2416, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_bertscore/f1": 0.8155, |
|
"eval_bertscore/precision": 0.8154, |
|
"eval_bertscore/recall": 0.8179, |
|
"eval_mean_prediction_length_characters": 850.697, |
|
"eval_mean_prediction_length_tokens": 185.038, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.5988, |
|
"eval_rouge/rouge1": 72.9601, |
|
"eval_rouge/rouge2": 56.3526, |
|
"eval_rouge/rougeL": 54.1244, |
|
"eval_rouge/rougeLsum": 71.3786, |
|
"eval_runtime": 2440.9193, |
|
"eval_samples_per_second": 0.41, |
|
"eval_steps_per_second": 0.41, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.43498656220514e-06, |
|
"loss": 0.2404, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.412627502537755e-06, |
|
"loss": 0.2597, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_bertscore/f1": 0.8146, |
|
"eval_bertscore/precision": 0.8118, |
|
"eval_bertscore/recall": 0.8194, |
|
"eval_mean_prediction_length_characters": 856.43, |
|
"eval_mean_prediction_length_tokens": 186.628, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 60.5122, |
|
"eval_rouge/rouge1": 73.1873, |
|
"eval_rouge/rouge2": 56.3588, |
|
"eval_rouge/rougeL": 53.7195, |
|
"eval_rouge/rougeLsum": 71.5458, |
|
"eval_runtime": 2434.8964, |
|
"eval_samples_per_second": 0.411, |
|
"eval_steps_per_second": 0.411, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.390268442870366e-06, |
|
"loss": 0.2436, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.367954101322315e-06, |
|
"loss": 0.2526, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_bertscore/f1": 0.8206, |
|
"eval_bertscore/precision": 0.8186, |
|
"eval_bertscore/recall": 0.8245, |
|
"eval_mean_prediction_length_characters": 843.759, |
|
"eval_mean_prediction_length_tokens": 183.492, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.4692, |
|
"eval_rouge/rouge1": 74.1108, |
|
"eval_rouge/rouge2": 57.3353, |
|
"eval_rouge/rougeL": 54.6598, |
|
"eval_rouge/rougeLsum": 72.5858, |
|
"eval_runtime": 2441.5715, |
|
"eval_samples_per_second": 0.41, |
|
"eval_steps_per_second": 0.41, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.34559504165493e-06, |
|
"loss": 0.248, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.323235981987542e-06, |
|
"loss": 0.2431, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_bertscore/f1": 0.8214, |
|
"eval_bertscore/precision": 0.8186, |
|
"eval_bertscore/recall": 0.826, |
|
"eval_mean_prediction_length_characters": 854.807, |
|
"eval_mean_prediction_length_tokens": 186.06, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.6121, |
|
"eval_rouge/rouge1": 74.3063, |
|
"eval_rouge/rouge2": 57.6124, |
|
"eval_rouge/rougeL": 54.6331, |
|
"eval_rouge/rougeLsum": 72.7609, |
|
"eval_runtime": 2483.1491, |
|
"eval_samples_per_second": 0.403, |
|
"eval_steps_per_second": 0.403, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.300876922320155e-06, |
|
"loss": 0.2413, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.27851786265277e-06, |
|
"loss": 0.2556, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_bertscore/f1": 0.8212, |
|
"eval_bertscore/precision": 0.8166, |
|
"eval_bertscore/recall": 0.8277, |
|
"eval_mean_prediction_length_characters": 871.94, |
|
"eval_mean_prediction_length_tokens": 189.412, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.6287, |
|
"eval_rouge/rouge1": 74.4291, |
|
"eval_rouge/rouge2": 57.5628, |
|
"eval_rouge/rougeL": 54.6343, |
|
"eval_rouge/rougeLsum": 72.7972, |
|
"eval_runtime": 2504.8302, |
|
"eval_samples_per_second": 0.399, |
|
"eval_steps_per_second": 0.399, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.256158802985382e-06, |
|
"loss": 0.2414, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.233799743317997e-06, |
|
"loss": 0.2493, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_bertscore/f1": 0.8202, |
|
"eval_bertscore/precision": 0.8115, |
|
"eval_bertscore/recall": 0.831, |
|
"eval_mean_prediction_length_characters": 919.152, |
|
"eval_mean_prediction_length_tokens": 199.285, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.6474, |
|
"eval_rouge/rouge1": 74.2897, |
|
"eval_rouge/rouge2": 57.7571, |
|
"eval_rouge/rougeL": 54.6022, |
|
"eval_rouge/rougeLsum": 72.7195, |
|
"eval_runtime": 2638.78, |
|
"eval_samples_per_second": 0.379, |
|
"eval_steps_per_second": 0.379, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.21144068365061e-06, |
|
"loss": 0.2467, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.189081623983222e-06, |
|
"loss": 0.2421, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_bertscore/f1": 0.823, |
|
"eval_bertscore/precision": 0.8258, |
|
"eval_bertscore/recall": 0.8222, |
|
"eval_mean_prediction_length_characters": 803.212, |
|
"eval_mean_prediction_length_tokens": 176.172, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.9161, |
|
"eval_rouge/rouge1": 74.3746, |
|
"eval_rouge/rouge2": 57.9399, |
|
"eval_rouge/rougeL": 55.0818, |
|
"eval_rouge/rougeLsum": 72.8967, |
|
"eval_runtime": 2327.757, |
|
"eval_samples_per_second": 0.43, |
|
"eval_steps_per_second": 0.43, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.166722564315837e-06, |
|
"loss": 0.2478, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 9.14436350464845e-06, |
|
"loss": 0.229, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_bertscore/f1": 0.8212, |
|
"eval_bertscore/precision": 0.822, |
|
"eval_bertscore/recall": 0.8227, |
|
"eval_mean_prediction_length_characters": 819.677, |
|
"eval_mean_prediction_length_tokens": 178.389, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 61.8804, |
|
"eval_rouge/rouge1": 74.0116, |
|
"eval_rouge/rouge2": 57.9086, |
|
"eval_rouge/rougeL": 55.2862, |
|
"eval_rouge/rougeLsum": 72.466, |
|
"eval_runtime": 2366.2663, |
|
"eval_samples_per_second": 0.423, |
|
"eval_steps_per_second": 0.423, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.122049163100398e-06, |
|
"loss": 0.2398, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 9.09969010343301e-06, |
|
"loss": 0.2429, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_bertscore/f1": 0.8243, |
|
"eval_bertscore/precision": 0.8174, |
|
"eval_bertscore/recall": 0.8333, |
|
"eval_mean_prediction_length_characters": 896.966, |
|
"eval_mean_prediction_length_tokens": 195.349, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 62.2879, |
|
"eval_rouge/rouge1": 74.8367, |
|
"eval_rouge/rouge2": 58.4326, |
|
"eval_rouge/rougeL": 55.2638, |
|
"eval_rouge/rougeLsum": 73.3944, |
|
"eval_runtime": 2574.2161, |
|
"eval_samples_per_second": 0.388, |
|
"eval_steps_per_second": 0.388, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.077331043765623e-06, |
|
"loss": 0.2355, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.054971984098238e-06, |
|
"loss": 0.2375, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_bertscore/f1": 0.824, |
|
"eval_bertscore/precision": 0.82, |
|
"eval_bertscore/recall": 0.8303, |
|
"eval_mean_prediction_length_characters": 873.321, |
|
"eval_mean_prediction_length_tokens": 190.408, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 62.1689, |
|
"eval_rouge/rouge1": 74.7062, |
|
"eval_rouge/rouge2": 58.2978, |
|
"eval_rouge/rougeL": 55.1711, |
|
"eval_rouge/rougeLsum": 73.2347, |
|
"eval_runtime": 2618.1437, |
|
"eval_samples_per_second": 0.382, |
|
"eval_steps_per_second": 0.382, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.032657642550186e-06, |
|
"loss": 0.2425, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.010298582882799e-06, |
|
"loss": 0.228, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_bertscore/f1": 0.8258, |
|
"eval_bertscore/precision": 0.8189, |
|
"eval_bertscore/recall": 0.8348, |
|
"eval_mean_prediction_length_characters": 904.242, |
|
"eval_mean_prediction_length_tokens": 196.813, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 62.6657, |
|
"eval_rouge/rouge1": 75.0484, |
|
"eval_rouge/rouge2": 59.0138, |
|
"eval_rouge/rougeL": 55.5642, |
|
"eval_rouge/rougeLsum": 73.6606, |
|
"eval_runtime": 2709.9214, |
|
"eval_samples_per_second": 0.369, |
|
"eval_steps_per_second": 0.369, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.987939523215412e-06, |
|
"loss": 0.2388, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.965580463548026e-06, |
|
"loss": 0.2398, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_bertscore/f1": 0.8287, |
|
"eval_bertscore/precision": 0.8255, |
|
"eval_bertscore/recall": 0.8338, |
|
"eval_mean_prediction_length_characters": 861.415, |
|
"eval_mean_prediction_length_tokens": 187.846, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 63.1011, |
|
"eval_rouge/rouge1": 75.5807, |
|
"eval_rouge/rouge2": 59.457, |
|
"eval_rouge/rougeL": 55.9108, |
|
"eval_rouge/rougeLsum": 74.1699, |
|
"eval_runtime": 2601.0783, |
|
"eval_samples_per_second": 0.384, |
|
"eval_steps_per_second": 0.384, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.943221403880639e-06, |
|
"loss": 0.2399, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.920862344213252e-06, |
|
"loss": 0.2373, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_bertscore/f1": 0.8253, |
|
"eval_bertscore/precision": 0.8244, |
|
"eval_bertscore/recall": 0.828, |
|
"eval_mean_prediction_length_characters": 840.263, |
|
"eval_mean_prediction_length_tokens": 182.473, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 62.7461, |
|
"eval_rouge/rouge1": 74.8987, |
|
"eval_rouge/rouge2": 59.0142, |
|
"eval_rouge/rougeL": 55.8894, |
|
"eval_rouge/rougeLsum": 73.4189, |
|
"eval_runtime": 2549.6944, |
|
"eval_samples_per_second": 0.392, |
|
"eval_steps_per_second": 0.392, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.8985480026652e-06, |
|
"loss": 0.2397, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 8.876233661117149e-06, |
|
"loss": 0.2057, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_bertscore/f1": 0.8258, |
|
"eval_bertscore/precision": 0.8222, |
|
"eval_bertscore/recall": 0.8313, |
|
"eval_mean_prediction_length_characters": 857.49, |
|
"eval_mean_prediction_length_tokens": 186.547, |
|
"eval_num_predicted": 1000, |
|
"eval_rouge/geometric_mean": 62.8166, |
|
"eval_rouge/rouge1": 75.1042, |
|
"eval_rouge/rouge2": 59.0815, |
|
"eval_rouge/rougeL": 55.8608, |
|
"eval_rouge/rougeLsum": 73.5713, |
|
"eval_runtime": 2602.9106, |
|
"eval_samples_per_second": 0.384, |
|
"eval_steps_per_second": 0.384, |
|
"step": 50000 |
|
} |
|
], |
|
"max_steps": 248470, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.239894115827712e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|