koja_mbartLarge_37p_exp2 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
37d0287
{
"best_metric": 0.8987509608268738,
"best_model_checkpoint": "./mbartLarge_koja_37p_exp2/checkpoint-31250",
"epoch": 3.1927074158886737,
"eval_steps": 1250,
"global_step": 36250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 4.995587196987527e-05,
"loss": 3.3474,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 4.9808778536126146e-05,
"loss": 2.0622,
"step": 1000
},
{
"epoch": 0.11,
"eval_bleu": 1.2834,
"eval_gen_len": 17.8009,
"eval_loss": 1.6679450273513794,
"eval_runtime": 1488.7514,
"eval_samples_per_second": 15.252,
"eval_steps_per_second": 0.954,
"step": 1250
},
{
"epoch": 0.13,
"learning_rate": 4.966168510237703e-05,
"loss": 1.7123,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 4.951459166862791e-05,
"loss": 1.59,
"step": 2000
},
{
"epoch": 0.22,
"learning_rate": 4.936749823487879e-05,
"loss": 1.5139,
"step": 2500
},
{
"epoch": 0.22,
"eval_bleu": 2.0427,
"eval_gen_len": 17.8496,
"eval_loss": 1.4377689361572266,
"eval_runtime": 1494.0047,
"eval_samples_per_second": 15.199,
"eval_steps_per_second": 0.95,
"step": 2500
},
{
"epoch": 0.26,
"learning_rate": 4.922040480112968e-05,
"loss": 1.4565,
"step": 3000
},
{
"epoch": 0.31,
"learning_rate": 4.9073311367380564e-05,
"loss": 1.4121,
"step": 3500
},
{
"epoch": 0.33,
"eval_bleu": 2.7599,
"eval_gen_len": 17.7667,
"eval_loss": 1.3115850687026978,
"eval_runtime": 1502.2991,
"eval_samples_per_second": 15.115,
"eval_steps_per_second": 0.945,
"step": 3750
},
{
"epoch": 0.35,
"learning_rate": 4.892621793363145e-05,
"loss": 1.3511,
"step": 4000
},
{
"epoch": 0.4,
"learning_rate": 4.877912449988233e-05,
"loss": 1.3052,
"step": 4500
},
{
"epoch": 0.44,
"learning_rate": 4.863203106613321e-05,
"loss": 1.2879,
"step": 5000
},
{
"epoch": 0.44,
"eval_bleu": 3.1444,
"eval_gen_len": 17.8887,
"eval_loss": 1.23811936378479,
"eval_runtime": 1519.5015,
"eval_samples_per_second": 14.944,
"eval_steps_per_second": 0.935,
"step": 5000
},
{
"epoch": 0.48,
"learning_rate": 4.8484937632384094e-05,
"loss": 1.2634,
"step": 5500
},
{
"epoch": 0.53,
"learning_rate": 4.8337844198634976e-05,
"loss": 1.2344,
"step": 6000
},
{
"epoch": 0.55,
"eval_bleu": 3.3835,
"eval_gen_len": 17.8323,
"eval_loss": 1.1769133806228638,
"eval_runtime": 1506.2285,
"eval_samples_per_second": 15.075,
"eval_steps_per_second": 0.943,
"step": 6250
},
{
"epoch": 0.57,
"learning_rate": 4.819075076488586e-05,
"loss": 1.2213,
"step": 6500
},
{
"epoch": 0.62,
"learning_rate": 4.804365733113674e-05,
"loss": 1.1918,
"step": 7000
},
{
"epoch": 0.66,
"learning_rate": 4.789656389738762e-05,
"loss": 1.1778,
"step": 7500
},
{
"epoch": 0.66,
"eval_bleu": 3.9511,
"eval_gen_len": 17.4892,
"eval_loss": 1.1381820440292358,
"eval_runtime": 1485.4261,
"eval_samples_per_second": 15.287,
"eval_steps_per_second": 0.956,
"step": 7500
},
{
"epoch": 0.7,
"learning_rate": 4.7749470463638505e-05,
"loss": 1.1651,
"step": 8000
},
{
"epoch": 0.75,
"learning_rate": 4.760237702988939e-05,
"loss": 1.1461,
"step": 8500
},
{
"epoch": 0.77,
"eval_bleu": 3.9402,
"eval_gen_len": 18.0136,
"eval_loss": 1.09383225440979,
"eval_runtime": 1503.0162,
"eval_samples_per_second": 15.108,
"eval_steps_per_second": 0.945,
"step": 8750
},
{
"epoch": 0.79,
"learning_rate": 4.745528359614027e-05,
"loss": 1.1341,
"step": 9000
},
{
"epoch": 0.84,
"learning_rate": 4.730819016239115e-05,
"loss": 1.1294,
"step": 9500
},
{
"epoch": 0.88,
"learning_rate": 4.7161096728642035e-05,
"loss": 1.1151,
"step": 10000
},
{
"epoch": 0.88,
"eval_bleu": 4.2134,
"eval_gen_len": 18.0537,
"eval_loss": 1.074916124343872,
"eval_runtime": 1564.4468,
"eval_samples_per_second": 14.514,
"eval_steps_per_second": 0.908,
"step": 10000
},
{
"epoch": 0.92,
"learning_rate": 4.701400329489292e-05,
"loss": 1.1102,
"step": 10500
},
{
"epoch": 0.97,
"learning_rate": 4.68669098611438e-05,
"loss": 1.093,
"step": 11000
},
{
"epoch": 0.99,
"eval_bleu": 3.9587,
"eval_gen_len": 17.8715,
"eval_loss": 1.0418165922164917,
"eval_runtime": 1534.6245,
"eval_samples_per_second": 14.796,
"eval_steps_per_second": 0.925,
"step": 11250
},
{
"epoch": 1.01,
"learning_rate": 4.671981642739469e-05,
"loss": 1.084,
"step": 11500
},
{
"epoch": 1.06,
"learning_rate": 4.657272299364557e-05,
"loss": 1.0641,
"step": 12000
},
{
"epoch": 1.1,
"learning_rate": 4.6425629559896446e-05,
"loss": 1.0626,
"step": 12500
},
{
"epoch": 1.1,
"eval_bleu": 4.6251,
"eval_gen_len": 17.9406,
"eval_loss": 1.0315195322036743,
"eval_runtime": 1499.6187,
"eval_samples_per_second": 15.142,
"eval_steps_per_second": 0.947,
"step": 12500
},
{
"epoch": 1.14,
"learning_rate": 4.627853612614733e-05,
"loss": 1.0332,
"step": 13000
},
{
"epoch": 1.19,
"learning_rate": 4.613144269239821e-05,
"loss": 1.0192,
"step": 13500
},
{
"epoch": 1.21,
"eval_bleu": 4.9573,
"eval_gen_len": 18.1266,
"eval_loss": 1.0131869316101074,
"eval_runtime": 1514.9755,
"eval_samples_per_second": 14.988,
"eval_steps_per_second": 0.937,
"step": 13750
},
{
"epoch": 1.23,
"learning_rate": 4.598434925864909e-05,
"loss": 1.0234,
"step": 14000
},
{
"epoch": 1.28,
"learning_rate": 4.5837255824899976e-05,
"loss": 1.0062,
"step": 14500
},
{
"epoch": 1.32,
"learning_rate": 4.569016239115086e-05,
"loss": 0.9957,
"step": 15000
},
{
"epoch": 1.32,
"eval_bleu": 4.3068,
"eval_gen_len": 18.0925,
"eval_loss": 0.9988749623298645,
"eval_runtime": 1565.2457,
"eval_samples_per_second": 14.507,
"eval_steps_per_second": 0.907,
"step": 15000
},
{
"epoch": 1.37,
"learning_rate": 4.554306895740174e-05,
"loss": 0.9843,
"step": 15500
},
{
"epoch": 1.41,
"learning_rate": 4.539597552365263e-05,
"loss": 0.9778,
"step": 16000
},
{
"epoch": 1.43,
"eval_bleu": 5.0517,
"eval_gen_len": 17.8783,
"eval_loss": 0.984965980052948,
"eval_runtime": 1508.5724,
"eval_samples_per_second": 15.052,
"eval_steps_per_second": 0.941,
"step": 16250
},
{
"epoch": 1.45,
"learning_rate": 4.524888208990351e-05,
"loss": 0.961,
"step": 16500
},
{
"epoch": 1.5,
"learning_rate": 4.5101788656154394e-05,
"loss": 0.9735,
"step": 17000
},
{
"epoch": 1.54,
"learning_rate": 4.4954695222405276e-05,
"loss": 0.9446,
"step": 17500
},
{
"epoch": 1.54,
"eval_bleu": 5.0194,
"eval_gen_len": 17.9348,
"eval_loss": 0.9747512936592102,
"eval_runtime": 1574.1886,
"eval_samples_per_second": 14.425,
"eval_steps_per_second": 0.902,
"step": 17500
},
{
"epoch": 1.59,
"learning_rate": 4.480760178865616e-05,
"loss": 0.9352,
"step": 18000
},
{
"epoch": 1.63,
"learning_rate": 4.466050835490704e-05,
"loss": 0.9236,
"step": 18500
},
{
"epoch": 1.65,
"eval_bleu": 4.6011,
"eval_gen_len": 17.7926,
"eval_loss": 0.9619229435920715,
"eval_runtime": 1510.4722,
"eval_samples_per_second": 15.033,
"eval_steps_per_second": 0.94,
"step": 18750
},
{
"epoch": 1.67,
"learning_rate": 4.4513414921157917e-05,
"loss": 0.9289,
"step": 19000
},
{
"epoch": 1.72,
"learning_rate": 4.43663214874088e-05,
"loss": 0.9216,
"step": 19500
},
{
"epoch": 1.76,
"learning_rate": 4.421922805365969e-05,
"loss": 0.9091,
"step": 20000
},
{
"epoch": 1.76,
"eval_bleu": 4.6035,
"eval_gen_len": 17.9399,
"eval_loss": 0.9564162492752075,
"eval_runtime": 1530.1826,
"eval_samples_per_second": 14.839,
"eval_steps_per_second": 0.928,
"step": 20000
},
{
"epoch": 1.81,
"learning_rate": 4.407213461991057e-05,
"loss": 0.9333,
"step": 20500
},
{
"epoch": 1.85,
"learning_rate": 4.392504118616145e-05,
"loss": 0.9072,
"step": 21000
},
{
"epoch": 1.87,
"eval_bleu": 4.8313,
"eval_gen_len": 17.6221,
"eval_loss": 0.9533364176750183,
"eval_runtime": 1475.3262,
"eval_samples_per_second": 15.391,
"eval_steps_per_second": 0.962,
"step": 21250
},
{
"epoch": 1.89,
"learning_rate": 4.3777947752412335e-05,
"loss": 0.8896,
"step": 21500
},
{
"epoch": 1.94,
"learning_rate": 4.363085431866322e-05,
"loss": 0.8945,
"step": 22000
},
{
"epoch": 1.98,
"learning_rate": 4.34837608849141e-05,
"loss": 0.8758,
"step": 22500
},
{
"epoch": 1.98,
"eval_bleu": 5.2707,
"eval_gen_len": 17.5851,
"eval_loss": 0.9421375393867493,
"eval_runtime": 1516.2122,
"eval_samples_per_second": 14.976,
"eval_steps_per_second": 0.937,
"step": 22500
},
{
"epoch": 2.03,
"learning_rate": 4.333666745116498e-05,
"loss": 0.8723,
"step": 23000
},
{
"epoch": 2.07,
"learning_rate": 4.3189574017415864e-05,
"loss": 0.8539,
"step": 23500
},
{
"epoch": 2.09,
"eval_bleu": 5.2661,
"eval_gen_len": 17.821,
"eval_loss": 0.9304332137107849,
"eval_runtime": 1492.9401,
"eval_samples_per_second": 15.21,
"eval_steps_per_second": 0.951,
"step": 23750
},
{
"epoch": 2.11,
"learning_rate": 4.3042480583666747e-05,
"loss": 0.8461,
"step": 24000
},
{
"epoch": 2.16,
"learning_rate": 4.289538714991763e-05,
"loss": 0.842,
"step": 24500
},
{
"epoch": 2.2,
"learning_rate": 4.274829371616851e-05,
"loss": 0.8575,
"step": 25000
},
{
"epoch": 2.2,
"eval_bleu": 4.9143,
"eval_gen_len": 17.8879,
"eval_loss": 0.9329252243041992,
"eval_runtime": 1560.9154,
"eval_samples_per_second": 14.547,
"eval_steps_per_second": 0.91,
"step": 25000
},
{
"epoch": 2.25,
"learning_rate": 4.2601200282419394e-05,
"loss": 0.8424,
"step": 25500
},
{
"epoch": 2.29,
"learning_rate": 4.2454106848670276e-05,
"loss": 0.8314,
"step": 26000
},
{
"epoch": 2.31,
"eval_bleu": 5.106,
"eval_gen_len": 18.0037,
"eval_loss": 0.9262479543685913,
"eval_runtime": 1520.8802,
"eval_samples_per_second": 14.93,
"eval_steps_per_second": 0.934,
"step": 26250
},
{
"epoch": 2.33,
"learning_rate": 4.230701341492116e-05,
"loss": 0.8172,
"step": 26500
},
{
"epoch": 2.38,
"learning_rate": 4.215991998117204e-05,
"loss": 0.8219,
"step": 27000
},
{
"epoch": 2.42,
"learning_rate": 4.201282654742292e-05,
"loss": 0.8248,
"step": 27500
},
{
"epoch": 2.42,
"eval_bleu": 5.3073,
"eval_gen_len": 17.6632,
"eval_loss": 0.9241103529930115,
"eval_runtime": 1501.7783,
"eval_samples_per_second": 15.12,
"eval_steps_per_second": 0.946,
"step": 27500
},
{
"epoch": 2.47,
"learning_rate": 4.1865733113673805e-05,
"loss": 0.8046,
"step": 28000
},
{
"epoch": 2.51,
"learning_rate": 4.1718639679924694e-05,
"loss": 0.8151,
"step": 28500
},
{
"epoch": 2.53,
"eval_bleu": 5.5675,
"eval_gen_len": 17.7676,
"eval_loss": 0.9301652312278748,
"eval_runtime": 1487.7016,
"eval_samples_per_second": 15.263,
"eval_steps_per_second": 0.954,
"step": 28750
},
{
"epoch": 2.55,
"learning_rate": 4.157154624617558e-05,
"loss": 0.8019,
"step": 29000
},
{
"epoch": 2.6,
"learning_rate": 4.142445281242646e-05,
"loss": 0.8423,
"step": 29500
},
{
"epoch": 2.64,
"learning_rate": 4.127735937867734e-05,
"loss": 0.8093,
"step": 30000
},
{
"epoch": 2.64,
"eval_bleu": 6.2644,
"eval_gen_len": 17.8475,
"eval_loss": 0.9149118065834045,
"eval_runtime": 1510.1876,
"eval_samples_per_second": 15.036,
"eval_steps_per_second": 0.94,
"step": 30000
},
{
"epoch": 2.69,
"learning_rate": 4.113026594492822e-05,
"loss": 0.7833,
"step": 30500
},
{
"epoch": 2.73,
"learning_rate": 4.09831725111791e-05,
"loss": 0.7691,
"step": 31000
},
{
"epoch": 2.75,
"eval_bleu": 6.6682,
"eval_gen_len": 17.7685,
"eval_loss": 0.8987509608268738,
"eval_runtime": 1501.6272,
"eval_samples_per_second": 15.122,
"eval_steps_per_second": 0.946,
"step": 31250
},
{
"epoch": 2.77,
"learning_rate": 4.083607907742998e-05,
"loss": 0.7673,
"step": 31500
},
{
"epoch": 2.82,
"learning_rate": 4.0688985643680864e-05,
"loss": 0.8064,
"step": 32000
},
{
"epoch": 2.86,
"learning_rate": 4.054189220993175e-05,
"loss": 0.771,
"step": 32500
},
{
"epoch": 2.86,
"eval_bleu": 5.7856,
"eval_gen_len": 17.8678,
"eval_loss": 0.9189176559448242,
"eval_runtime": 1503.2313,
"eval_samples_per_second": 15.105,
"eval_steps_per_second": 0.945,
"step": 32500
},
{
"epoch": 2.91,
"learning_rate": 4.0394798776182635e-05,
"loss": 0.7652,
"step": 33000
},
{
"epoch": 2.95,
"learning_rate": 4.024770534243352e-05,
"loss": 0.7658,
"step": 33500
},
{
"epoch": 2.97,
"eval_bleu": 6.2468,
"eval_gen_len": 17.7313,
"eval_loss": 0.9175418019294739,
"eval_runtime": 1487.9561,
"eval_samples_per_second": 15.261,
"eval_steps_per_second": 0.954,
"step": 33750
},
{
"epoch": 2.99,
"learning_rate": 4.01006119086844e-05,
"loss": 0.7568,
"step": 34000
},
{
"epoch": 3.04,
"learning_rate": 3.995351847493528e-05,
"loss": 0.7913,
"step": 34500
},
{
"epoch": 3.08,
"learning_rate": 3.9806425041186165e-05,
"loss": 0.7914,
"step": 35000
},
{
"epoch": 3.08,
"eval_bleu": 5.5525,
"eval_gen_len": 17.7627,
"eval_loss": 0.9019802808761597,
"eval_runtime": 1501.9062,
"eval_samples_per_second": 15.119,
"eval_steps_per_second": 0.945,
"step": 35000
},
{
"epoch": 3.13,
"learning_rate": 3.965933160743705e-05,
"loss": 0.7333,
"step": 35500
},
{
"epoch": 3.17,
"learning_rate": 3.951223817368793e-05,
"loss": 0.7264,
"step": 36000
},
{
"epoch": 3.19,
"eval_bleu": 6.2055,
"eval_gen_len": 17.7662,
"eval_loss": 0.9046412110328674,
"eval_runtime": 1496.9385,
"eval_samples_per_second": 15.169,
"eval_steps_per_second": 0.949,
"step": 36250
},
{
"epoch": 3.19,
"step": 36250,
"total_flos": 1.257073961708028e+18,
"train_loss": 1.0393056295460668,
"train_runtime": 72152.6791,
"train_samples_per_second": 37.766,
"train_steps_per_second": 2.36
}
],
"logging_steps": 500,
"max_steps": 170310,
"num_train_epochs": 15,
"save_steps": 1250,
"total_flos": 1.257073961708028e+18,
"trial_name": null,
"trial_params": null
}