{ "best_metric": 0.9320297951582869, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4350", "epoch": 9.988518943742825, "eval_steps": 500, "global_step": 4350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9988518943742825, "eval_accuracy": 0.9981441687829654, "eval_f1": 0.8958333333333334, "eval_loss": 0.006047643255442381, "eval_precision": 0.8713656387665198, "eval_recall": 0.9217148182665424, "eval_runtime": 14.9983, "eval_samples_per_second": 463.12, "eval_steps_per_second": 57.94, "step": 435 }, { "epoch": 1.148105625717566, "grad_norm": 0.025908155366778374, "learning_rate": 4.4252873563218394e-05, "loss": 0.0156, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9986604526553735, "eval_f1": 0.92, "eval_loss": 0.004434330388903618, "eval_precision": 0.9182915506035283, "eval_recall": 0.9217148182665424, "eval_runtime": 15.157, "eval_samples_per_second": 458.27, "eval_steps_per_second": 57.333, "step": 871 }, { "epoch": 2.296211251435132, "grad_norm": 0.015398409217596054, "learning_rate": 3.850574712643678e-05, "loss": 0.0038, "step": 1000 }, { "epoch": 2.9988518943742823, "eval_accuracy": 0.9986883598917199, "eval_f1": 0.9181073703366698, "eval_loss": 0.0040275463834404945, "eval_precision": 0.8968888888888888, "eval_recall": 0.940354147250699, "eval_runtime": 15.1362, "eval_samples_per_second": 458.9, "eval_steps_per_second": 57.412, "step": 1306 }, { "epoch": 3.444316877152698, "grad_norm": 0.024467067793011665, "learning_rate": 3.275862068965517e-05, "loss": 0.0025, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.9985837077554209, "eval_f1": 0.9215236346948141, "eval_loss": 0.004492571111768484, "eval_precision": 0.9077757685352622, "eval_recall": 0.9356943150046598, "eval_runtime": 15.2768, "eval_samples_per_second": 454.675, "eval_steps_per_second": 56.884, "step": 1742 }, { "epoch": 4.592422502870264, "grad_norm": 0.014663909561932087, "learning_rate": 2.7011494252873566e-05, "loss": 0.0016, "step": 2000 }, { "epoch": 4.998851894374282, "eval_accuracy": 0.9985906845645076, "eval_f1": 0.9138576779026217, "eval_loss": 0.0054086255840957165, "eval_precision": 0.9181561618062088, "eval_recall": 0.9095992544268406, "eval_runtime": 15.0107, "eval_samples_per_second": 462.737, "eval_steps_per_second": 57.892, "step": 2177 }, { "epoch": 5.7405281285878305, "grad_norm": 0.002929804613813758, "learning_rate": 2.1264367816091954e-05, "loss": 0.0011, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.9986395222281137, "eval_f1": 0.9202965708989805, "eval_loss": 0.00529602263122797, "eval_precision": 0.9152073732718894, "eval_recall": 0.9254426840633737, "eval_runtime": 15.2091, "eval_samples_per_second": 456.701, "eval_steps_per_second": 57.137, "step": 2613 }, { "epoch": 6.888633754305396, "grad_norm": 0.06796916574239731, "learning_rate": 1.5517241379310346e-05, "loss": 0.0009, "step": 3000 }, { "epoch": 6.998851894374282, "eval_accuracy": 0.9986604526553735, "eval_f1": 0.9314179796107507, "eval_loss": 0.006041177082806826, "eval_precision": 0.9262672811059908, "eval_recall": 0.9366262814538676, "eval_runtime": 15.0006, "eval_samples_per_second": 463.048, "eval_steps_per_second": 57.931, "step": 3048 }, { "epoch": 8.0, "eval_accuracy": 0.9987511511734993, "eval_f1": 0.9290976058931859, "eval_loss": 0.005891016684472561, "eval_precision": 0.9181073703366697, "eval_recall": 0.940354147250699, "eval_runtime": 15.2005, "eval_samples_per_second": 456.957, "eval_steps_per_second": 57.169, "step": 3484 }, { "epoch": 8.036739380022961, "grad_norm": 0.0015728959115222096, "learning_rate": 9.770114942528738e-06, "loss": 0.0005, "step": 3500 }, { "epoch": 8.998851894374283, "eval_accuracy": 0.9987581279825859, "eval_f1": 0.9279404927940494, "eval_loss": 0.006726197898387909, "eval_precision": 0.9257884972170687, "eval_recall": 0.9301025163094129, "eval_runtime": 15.4019, "eval_samples_per_second": 450.984, "eval_steps_per_second": 56.422, "step": 3919 }, { "epoch": 9.184845005740527, "grad_norm": 0.11025261133909225, "learning_rate": 4.022988505747127e-06, "loss": 0.0003, "step": 4000 }, { "epoch": 9.988518943742825, "eval_accuracy": 0.998772081600759, "eval_f1": 0.9320297951582869, "eval_loss": 0.007080046460032463, "eval_precision": 0.9311627906976744, "eval_recall": 0.9328984156570364, "eval_runtime": 15.2679, "eval_samples_per_second": 454.941, "eval_steps_per_second": 56.917, "step": 4350 }, { "epoch": 9.988518943742825, "step": 4350, "total_flos": 1.1084127968547612e+16, "train_loss": 0.003044272955806776, "train_runtime": 1809.5975, "train_samples_per_second": 153.852, "train_steps_per_second": 2.404 } ], "logging_steps": 500, "max_steps": 4350, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1084127968547612e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }