{ "best_metric": 0.8987509608268738, "best_model_checkpoint": "./mbartLarge_koja_37p_exp2/checkpoint-31250", "epoch": 3.1927074158886737, "eval_steps": 1250, "global_step": 36250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.995587196987527e-05, "loss": 3.3474, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.9808778536126146e-05, "loss": 2.0622, "step": 1000 }, { "epoch": 0.11, "eval_bleu": 1.2834, "eval_gen_len": 17.8009, "eval_loss": 1.6679450273513794, "eval_runtime": 1488.7514, "eval_samples_per_second": 15.252, "eval_steps_per_second": 0.954, "step": 1250 }, { "epoch": 0.13, "learning_rate": 4.966168510237703e-05, "loss": 1.7123, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.951459166862791e-05, "loss": 1.59, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.936749823487879e-05, "loss": 1.5139, "step": 2500 }, { "epoch": 0.22, "eval_bleu": 2.0427, "eval_gen_len": 17.8496, "eval_loss": 1.4377689361572266, "eval_runtime": 1494.0047, "eval_samples_per_second": 15.199, "eval_steps_per_second": 0.95, "step": 2500 }, { "epoch": 0.26, "learning_rate": 4.922040480112968e-05, "loss": 1.4565, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.9073311367380564e-05, "loss": 1.4121, "step": 3500 }, { "epoch": 0.33, "eval_bleu": 2.7599, "eval_gen_len": 17.7667, "eval_loss": 1.3115850687026978, "eval_runtime": 1502.2991, "eval_samples_per_second": 15.115, "eval_steps_per_second": 0.945, "step": 3750 }, { "epoch": 0.35, "learning_rate": 4.892621793363145e-05, "loss": 1.3511, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.877912449988233e-05, "loss": 1.3052, "step": 4500 }, { "epoch": 0.44, "learning_rate": 4.863203106613321e-05, "loss": 1.2879, "step": 5000 }, { "epoch": 0.44, "eval_bleu": 3.1444, "eval_gen_len": 17.8887, "eval_loss": 1.23811936378479, "eval_runtime": 1519.5015, "eval_samples_per_second": 14.944, "eval_steps_per_second": 0.935, "step": 5000 }, { "epoch": 0.48, "learning_rate": 4.8484937632384094e-05, "loss": 1.2634, "step": 5500 }, { "epoch": 0.53, "learning_rate": 4.8337844198634976e-05, "loss": 1.2344, "step": 6000 }, { "epoch": 0.55, "eval_bleu": 3.3835, "eval_gen_len": 17.8323, "eval_loss": 1.1769133806228638, "eval_runtime": 1506.2285, "eval_samples_per_second": 15.075, "eval_steps_per_second": 0.943, "step": 6250 }, { "epoch": 0.57, "learning_rate": 4.819075076488586e-05, "loss": 1.2213, "step": 6500 }, { "epoch": 0.62, "learning_rate": 4.804365733113674e-05, "loss": 1.1918, "step": 7000 }, { "epoch": 0.66, "learning_rate": 4.789656389738762e-05, "loss": 1.1778, "step": 7500 }, { "epoch": 0.66, "eval_bleu": 3.9511, "eval_gen_len": 17.4892, "eval_loss": 1.1381820440292358, "eval_runtime": 1485.4261, "eval_samples_per_second": 15.287, "eval_steps_per_second": 0.956, "step": 7500 }, { "epoch": 0.7, "learning_rate": 4.7749470463638505e-05, "loss": 1.1651, "step": 8000 }, { "epoch": 0.75, "learning_rate": 4.760237702988939e-05, "loss": 1.1461, "step": 8500 }, { "epoch": 0.77, "eval_bleu": 3.9402, "eval_gen_len": 18.0136, "eval_loss": 1.09383225440979, "eval_runtime": 1503.0162, "eval_samples_per_second": 15.108, "eval_steps_per_second": 0.945, "step": 8750 }, { "epoch": 0.79, "learning_rate": 4.745528359614027e-05, "loss": 1.1341, "step": 9000 }, { "epoch": 0.84, "learning_rate": 4.730819016239115e-05, "loss": 1.1294, "step": 9500 }, { "epoch": 0.88, "learning_rate": 4.7161096728642035e-05, "loss": 1.1151, "step": 10000 }, { "epoch": 0.88, "eval_bleu": 4.2134, "eval_gen_len": 18.0537, "eval_loss": 1.074916124343872, "eval_runtime": 1564.4468, "eval_samples_per_second": 14.514, "eval_steps_per_second": 0.908, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.701400329489292e-05, "loss": 1.1102, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.68669098611438e-05, "loss": 1.093, "step": 11000 }, { "epoch": 0.99, "eval_bleu": 3.9587, "eval_gen_len": 17.8715, "eval_loss": 1.0418165922164917, "eval_runtime": 1534.6245, "eval_samples_per_second": 14.796, "eval_steps_per_second": 0.925, "step": 11250 }, { "epoch": 1.01, "learning_rate": 4.671981642739469e-05, "loss": 1.084, "step": 11500 }, { "epoch": 1.06, "learning_rate": 4.657272299364557e-05, "loss": 1.0641, "step": 12000 }, { "epoch": 1.1, "learning_rate": 4.6425629559896446e-05, "loss": 1.0626, "step": 12500 }, { "epoch": 1.1, "eval_bleu": 4.6251, "eval_gen_len": 17.9406, "eval_loss": 1.0315195322036743, "eval_runtime": 1499.6187, "eval_samples_per_second": 15.142, "eval_steps_per_second": 0.947, "step": 12500 }, { "epoch": 1.14, "learning_rate": 4.627853612614733e-05, "loss": 1.0332, "step": 13000 }, { "epoch": 1.19, "learning_rate": 4.613144269239821e-05, "loss": 1.0192, "step": 13500 }, { "epoch": 1.21, "eval_bleu": 4.9573, "eval_gen_len": 18.1266, "eval_loss": 1.0131869316101074, "eval_runtime": 1514.9755, "eval_samples_per_second": 14.988, "eval_steps_per_second": 0.937, "step": 13750 }, { "epoch": 1.23, "learning_rate": 4.598434925864909e-05, "loss": 1.0234, "step": 14000 }, { "epoch": 1.28, "learning_rate": 4.5837255824899976e-05, "loss": 1.0062, "step": 14500 }, { "epoch": 1.32, "learning_rate": 4.569016239115086e-05, "loss": 0.9957, "step": 15000 }, { "epoch": 1.32, "eval_bleu": 4.3068, "eval_gen_len": 18.0925, "eval_loss": 0.9988749623298645, "eval_runtime": 1565.2457, "eval_samples_per_second": 14.507, "eval_steps_per_second": 0.907, "step": 15000 }, { "epoch": 1.37, "learning_rate": 4.554306895740174e-05, "loss": 0.9843, "step": 15500 }, { "epoch": 1.41, "learning_rate": 4.539597552365263e-05, "loss": 0.9778, "step": 16000 }, { "epoch": 1.43, "eval_bleu": 5.0517, "eval_gen_len": 17.8783, "eval_loss": 0.984965980052948, "eval_runtime": 1508.5724, "eval_samples_per_second": 15.052, "eval_steps_per_second": 0.941, "step": 16250 }, { "epoch": 1.45, "learning_rate": 4.524888208990351e-05, "loss": 0.961, "step": 16500 }, { "epoch": 1.5, "learning_rate": 4.5101788656154394e-05, "loss": 0.9735, "step": 17000 }, { "epoch": 1.54, "learning_rate": 4.4954695222405276e-05, "loss": 0.9446, "step": 17500 }, { "epoch": 1.54, "eval_bleu": 5.0194, "eval_gen_len": 17.9348, "eval_loss": 0.9747512936592102, "eval_runtime": 1574.1886, "eval_samples_per_second": 14.425, "eval_steps_per_second": 0.902, "step": 17500 }, { "epoch": 1.59, "learning_rate": 4.480760178865616e-05, "loss": 0.9352, "step": 18000 }, { "epoch": 1.63, "learning_rate": 4.466050835490704e-05, "loss": 0.9236, "step": 18500 }, { "epoch": 1.65, "eval_bleu": 4.6011, "eval_gen_len": 17.7926, "eval_loss": 0.9619229435920715, "eval_runtime": 1510.4722, "eval_samples_per_second": 15.033, "eval_steps_per_second": 0.94, "step": 18750 }, { "epoch": 1.67, "learning_rate": 4.4513414921157917e-05, "loss": 0.9289, "step": 19000 }, { "epoch": 1.72, "learning_rate": 4.43663214874088e-05, "loss": 0.9216, "step": 19500 }, { "epoch": 1.76, "learning_rate": 4.421922805365969e-05, "loss": 0.9091, "step": 20000 }, { "epoch": 1.76, "eval_bleu": 4.6035, "eval_gen_len": 17.9399, "eval_loss": 0.9564162492752075, "eval_runtime": 1530.1826, "eval_samples_per_second": 14.839, "eval_steps_per_second": 0.928, "step": 20000 }, { "epoch": 1.81, "learning_rate": 4.407213461991057e-05, "loss": 0.9333, "step": 20500 }, { "epoch": 1.85, "learning_rate": 4.392504118616145e-05, "loss": 0.9072, "step": 21000 }, { "epoch": 1.87, "eval_bleu": 4.8313, "eval_gen_len": 17.6221, "eval_loss": 0.9533364176750183, "eval_runtime": 1475.3262, "eval_samples_per_second": 15.391, "eval_steps_per_second": 0.962, "step": 21250 }, { "epoch": 1.89, "learning_rate": 4.3777947752412335e-05, "loss": 0.8896, "step": 21500 }, { "epoch": 1.94, "learning_rate": 4.363085431866322e-05, "loss": 0.8945, "step": 22000 }, { "epoch": 1.98, "learning_rate": 4.34837608849141e-05, "loss": 0.8758, "step": 22500 }, { "epoch": 1.98, "eval_bleu": 5.2707, "eval_gen_len": 17.5851, "eval_loss": 0.9421375393867493, "eval_runtime": 1516.2122, "eval_samples_per_second": 14.976, "eval_steps_per_second": 0.937, "step": 22500 }, { "epoch": 2.03, "learning_rate": 4.333666745116498e-05, "loss": 0.8723, "step": 23000 }, { "epoch": 2.07, "learning_rate": 4.3189574017415864e-05, "loss": 0.8539, "step": 23500 }, { "epoch": 2.09, "eval_bleu": 5.2661, "eval_gen_len": 17.821, "eval_loss": 0.9304332137107849, "eval_runtime": 1492.9401, "eval_samples_per_second": 15.21, "eval_steps_per_second": 0.951, "step": 23750 }, { "epoch": 2.11, "learning_rate": 4.3042480583666747e-05, "loss": 0.8461, "step": 24000 }, { "epoch": 2.16, "learning_rate": 4.289538714991763e-05, "loss": 0.842, "step": 24500 }, { "epoch": 2.2, "learning_rate": 4.274829371616851e-05, "loss": 0.8575, "step": 25000 }, { "epoch": 2.2, "eval_bleu": 4.9143, "eval_gen_len": 17.8879, "eval_loss": 0.9329252243041992, "eval_runtime": 1560.9154, "eval_samples_per_second": 14.547, "eval_steps_per_second": 0.91, "step": 25000 }, { "epoch": 2.25, "learning_rate": 4.2601200282419394e-05, "loss": 0.8424, "step": 25500 }, { "epoch": 2.29, "learning_rate": 4.2454106848670276e-05, "loss": 0.8314, "step": 26000 }, { "epoch": 2.31, "eval_bleu": 5.106, "eval_gen_len": 18.0037, "eval_loss": 0.9262479543685913, "eval_runtime": 1520.8802, "eval_samples_per_second": 14.93, "eval_steps_per_second": 0.934, "step": 26250 }, { "epoch": 2.33, "learning_rate": 4.230701341492116e-05, "loss": 0.8172, "step": 26500 }, { "epoch": 2.38, "learning_rate": 4.215991998117204e-05, "loss": 0.8219, "step": 27000 }, { "epoch": 2.42, "learning_rate": 4.201282654742292e-05, "loss": 0.8248, "step": 27500 }, { "epoch": 2.42, "eval_bleu": 5.3073, "eval_gen_len": 17.6632, "eval_loss": 0.9241103529930115, "eval_runtime": 1501.7783, "eval_samples_per_second": 15.12, "eval_steps_per_second": 0.946, "step": 27500 }, { "epoch": 2.47, "learning_rate": 4.1865733113673805e-05, "loss": 0.8046, "step": 28000 }, { "epoch": 2.51, "learning_rate": 4.1718639679924694e-05, "loss": 0.8151, "step": 28500 }, { "epoch": 2.53, "eval_bleu": 5.5675, "eval_gen_len": 17.7676, "eval_loss": 0.9301652312278748, "eval_runtime": 1487.7016, "eval_samples_per_second": 15.263, "eval_steps_per_second": 0.954, "step": 28750 }, { "epoch": 2.55, "learning_rate": 4.157154624617558e-05, "loss": 0.8019, "step": 29000 }, { "epoch": 2.6, "learning_rate": 4.142445281242646e-05, "loss": 0.8423, "step": 29500 }, { "epoch": 2.64, "learning_rate": 4.127735937867734e-05, "loss": 0.8093, "step": 30000 }, { "epoch": 2.64, "eval_bleu": 6.2644, "eval_gen_len": 17.8475, "eval_loss": 0.9149118065834045, "eval_runtime": 1510.1876, "eval_samples_per_second": 15.036, "eval_steps_per_second": 0.94, "step": 30000 }, { "epoch": 2.69, "learning_rate": 4.113026594492822e-05, "loss": 0.7833, "step": 30500 }, { "epoch": 2.73, "learning_rate": 4.09831725111791e-05, "loss": 0.7691, "step": 31000 }, { "epoch": 2.75, "eval_bleu": 6.6682, "eval_gen_len": 17.7685, "eval_loss": 0.8987509608268738, "eval_runtime": 1501.6272, "eval_samples_per_second": 15.122, "eval_steps_per_second": 0.946, "step": 31250 }, { "epoch": 2.77, "learning_rate": 4.083607907742998e-05, "loss": 0.7673, "step": 31500 }, { "epoch": 2.82, "learning_rate": 4.0688985643680864e-05, "loss": 0.8064, "step": 32000 }, { "epoch": 2.86, "learning_rate": 4.054189220993175e-05, "loss": 0.771, "step": 32500 }, { "epoch": 2.86, "eval_bleu": 5.7856, "eval_gen_len": 17.8678, "eval_loss": 0.9189176559448242, "eval_runtime": 1503.2313, "eval_samples_per_second": 15.105, "eval_steps_per_second": 0.945, "step": 32500 }, { "epoch": 2.91, "learning_rate": 4.0394798776182635e-05, "loss": 0.7652, "step": 33000 }, { "epoch": 2.95, "learning_rate": 4.024770534243352e-05, "loss": 0.7658, "step": 33500 }, { "epoch": 2.97, "eval_bleu": 6.2468, "eval_gen_len": 17.7313, "eval_loss": 0.9175418019294739, "eval_runtime": 1487.9561, "eval_samples_per_second": 15.261, "eval_steps_per_second": 0.954, "step": 33750 }, { "epoch": 2.99, "learning_rate": 4.01006119086844e-05, "loss": 0.7568, "step": 34000 }, { "epoch": 3.04, "learning_rate": 3.995351847493528e-05, "loss": 0.7913, "step": 34500 }, { "epoch": 3.08, "learning_rate": 3.9806425041186165e-05, "loss": 0.7914, "step": 35000 }, { "epoch": 3.08, "eval_bleu": 5.5525, "eval_gen_len": 17.7627, "eval_loss": 0.9019802808761597, "eval_runtime": 1501.9062, "eval_samples_per_second": 15.119, "eval_steps_per_second": 0.945, "step": 35000 }, { "epoch": 3.13, "learning_rate": 3.965933160743705e-05, "loss": 0.7333, "step": 35500 }, { "epoch": 3.17, "learning_rate": 3.951223817368793e-05, "loss": 0.7264, "step": 36000 }, { "epoch": 3.19, "eval_bleu": 6.2055, "eval_gen_len": 17.7662, "eval_loss": 0.9046412110328674, "eval_runtime": 1496.9385, "eval_samples_per_second": 15.169, "eval_steps_per_second": 0.949, "step": 36250 }, { "epoch": 3.19, "step": 36250, "total_flos": 1.257073961708028e+18, "train_loss": 1.0393056295460668, "train_runtime": 72152.6791, "train_samples_per_second": 37.766, "train_steps_per_second": 2.36 } ], "logging_steps": 500, "max_steps": 170310, "num_train_epochs": 15, "save_steps": 1250, "total_flos": 1.257073961708028e+18, "trial_name": null, "trial_params": null }