{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08333333333333333, "grad_norm": 5.815704345703125, "learning_rate": 4.166666666666667e-06, "loss": 0.9944, "step": 1 }, { "epoch": 1.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.37742123007774353, "eval_runtime": 10.6636, "eval_samples_per_second": 6.471, "eval_steps_per_second": 1.125, "step": 12 }, { "epoch": 2.0, "eval_accuracy_fol": 0.9855072463768116, "eval_loss": 0.36990299820899963, "eval_runtime": 10.0571, "eval_samples_per_second": 6.861, "eval_steps_per_second": 1.193, "step": 24 }, { "epoch": 3.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.4505714774131775, "eval_runtime": 10.0403, "eval_samples_per_second": 6.872, "eval_steps_per_second": 1.195, "step": 36 }, { "epoch": 4.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.49083924293518066, "eval_runtime": 10.1223, "eval_samples_per_second": 6.817, "eval_steps_per_second": 1.186, "step": 48 }, { "epoch": 5.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.551354706287384, "eval_runtime": 9.943, "eval_samples_per_second": 6.94, "eval_steps_per_second": 1.207, "step": 60 }, { "epoch": 6.0, "eval_accuracy_fol": 0.9565217391304348, "eval_loss": 0.5758901834487915, "eval_runtime": 10.0414, "eval_samples_per_second": 6.872, "eval_steps_per_second": 1.195, "step": 72 }, { "epoch": 7.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.6346259117126465, "eval_runtime": 10.0121, "eval_samples_per_second": 6.892, "eval_steps_per_second": 1.199, "step": 84 }, { "epoch": 8.0, "eval_accuracy_fol": 0.9855072463768116, "eval_loss": 0.6786300539970398, "eval_runtime": 9.853, "eval_samples_per_second": 7.003, "eval_steps_per_second": 1.218, "step": 96 }, { "epoch": 9.0, "eval_accuracy_fol": 0.9855072463768116, "eval_loss": 0.7070556282997131, "eval_runtime": 10.0666, "eval_samples_per_second": 6.854, "eval_steps_per_second": 1.192, "step": 108 }, { "epoch": 10.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.7301931381225586, "eval_runtime": 9.8811, "eval_samples_per_second": 6.983, "eval_steps_per_second": 1.214, "step": 120 }, { "epoch": 11.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.7777052521705627, "eval_runtime": 10.0443, "eval_samples_per_second": 6.87, "eval_steps_per_second": 1.195, "step": 132 }, { "epoch": 12.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.7906161546707153, "eval_runtime": 9.9021, "eval_samples_per_second": 6.968, "eval_steps_per_second": 1.212, "step": 144 }, { "epoch": 13.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.7852453589439392, "eval_runtime": 10.0952, "eval_samples_per_second": 6.835, "eval_steps_per_second": 1.189, "step": 156 }, { "epoch": 14.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8031890392303467, "eval_runtime": 9.9012, "eval_samples_per_second": 6.969, "eval_steps_per_second": 1.212, "step": 168 }, { "epoch": 15.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8141315579414368, "eval_runtime": 10.0811, "eval_samples_per_second": 6.845, "eval_steps_per_second": 1.19, "step": 180 }, { "epoch": 16.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8203307390213013, "eval_runtime": 10.0556, "eval_samples_per_second": 6.862, "eval_steps_per_second": 1.193, "step": 192 }, { "epoch": 17.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8233786225318909, "eval_runtime": 10.038, "eval_samples_per_second": 6.874, "eval_steps_per_second": 1.195, "step": 204 }, { "epoch": 18.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8255409002304077, "eval_runtime": 9.8663, "eval_samples_per_second": 6.993, "eval_steps_per_second": 1.216, "step": 216 }, { "epoch": 19.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8259706497192383, "eval_runtime": 9.9818, "eval_samples_per_second": 6.913, "eval_steps_per_second": 1.202, "step": 228 }, { "epoch": 20.0, "eval_accuracy_fol": 1.0, "eval_loss": 0.8260682821273804, "eval_runtime": 9.8198, "eval_samples_per_second": 7.027, "eval_steps_per_second": 1.222, "step": 240 }, { "epoch": 20.0, "step": 240, "total_flos": 4.93214716919808e+16, "train_loss": 0.09651691963275273, "train_runtime": 472.7445, "train_samples_per_second": 11.846, "train_steps_per_second": 0.508 } ], "logging_steps": 700, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.93214716919808e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }