{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 3810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.0167, "eval_gen_len": 19.0, "eval_loss": 1.0651702880859375, "eval_runtime": 37.0111, "eval_samples_per_second": 20.588, "eval_steps_per_second": 1.297, "step": 381 }, { "epoch": 1.31, "learning_rate": 4.343832020997376e-05, "loss": 1.5288, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.0402, "eval_gen_len": 19.0, "eval_loss": 0.933931291103363, "eval_runtime": 37.1713, "eval_samples_per_second": 20.5, "eval_steps_per_second": 1.291, "step": 762 }, { "epoch": 2.62, "learning_rate": 3.6876640419947505e-05, "loss": 1.063, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.0311, "eval_gen_len": 19.0, "eval_loss": 0.8651323318481445, "eval_runtime": 37.9241, "eval_samples_per_second": 20.093, "eval_steps_per_second": 1.266, "step": 1143 }, { "epoch": 3.94, "learning_rate": 3.0314960629921263e-05, "loss": 0.9558, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 0.1018, "eval_gen_len": 19.0, "eval_loss": 0.8271389603614807, "eval_runtime": 37.7015, "eval_samples_per_second": 20.211, "eval_steps_per_second": 1.273, "step": 1524 }, { "epoch": 5.0, "eval_bleu": 0.0744, "eval_gen_len": 19.0, "eval_loss": 0.8043217062950134, "eval_runtime": 38.014, "eval_samples_per_second": 20.045, "eval_steps_per_second": 1.263, "step": 1905 }, { "epoch": 5.25, "learning_rate": 2.3753280839895015e-05, "loss": 0.8979, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 0.0786, "eval_gen_len": 19.0, "eval_loss": 0.7830905318260193, "eval_runtime": 37.5069, "eval_samples_per_second": 20.316, "eval_steps_per_second": 1.28, "step": 2286 }, { "epoch": 6.56, "learning_rate": 1.7191601049868766e-05, "loss": 0.8598, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 0.086, "eval_gen_len": 19.0, "eval_loss": 0.7698926329612732, "eval_runtime": 37.7633, "eval_samples_per_second": 20.178, "eval_steps_per_second": 1.271, "step": 2667 }, { "epoch": 7.87, "learning_rate": 1.062992125984252e-05, "loss": 0.8346, "step": 3000 }, { "epoch": 8.0, "eval_bleu": 0.0803, "eval_gen_len": 19.0, "eval_loss": 0.7630091309547424, "eval_runtime": 37.5503, "eval_samples_per_second": 20.293, "eval_steps_per_second": 1.278, "step": 3048 }, { "epoch": 9.0, "eval_bleu": 0.1179, "eval_gen_len": 19.0, "eval_loss": 0.7571505904197693, "eval_runtime": 37.5775, "eval_samples_per_second": 20.278, "eval_steps_per_second": 1.277, "step": 3429 }, { "epoch": 9.19, "learning_rate": 4.068241469816273e-06, "loss": 0.8194, "step": 3500 }, { "epoch": 10.0, "eval_bleu": 0.1133, "eval_gen_len": 19.0, "eval_loss": 0.7551639676094055, "eval_runtime": 37.5909, "eval_samples_per_second": 20.271, "eval_steps_per_second": 1.277, "step": 3810 }, { "epoch": 10.0, "step": 3810, "total_flos": 2.167800665997312e+16, "train_loss": 0.9795461539521305, "train_runtime": 2020.5453, "train_samples_per_second": 30.17, "train_steps_per_second": 1.886 } ], "max_steps": 3810, "num_train_epochs": 10, "total_flos": 2.167800665997312e+16, "trial_name": null, "trial_params": null }