{ "best_metric": 0.7521968365553603, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-38/checkpoint-642", "epoch": 4.0, "eval_steps": 500, "global_step": 856, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.823625087738037, "learning_rate": 0.00022026694526910956, "loss": 0.4997, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.7129466900995899, "eval_f1": 0.40243902439024387, "eval_loss": 0.4626123011112213, "eval_mcc": 0.2854021761857005, "eval_precision": 0.6573705179282868, "eval_recall": 0.28998242530755713, "eval_runtime": 3.1456, "eval_samples_per_second": 542.661, "eval_steps_per_second": 17.167, "step": 214 }, { "epoch": 2.0, "grad_norm": 1.7993404865264893, "learning_rate": 0.0001888002388020939, "loss": 0.4542, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7398945518453427, "eval_f1": 0.47641509433962265, "eval_loss": 0.4441252648830414, "eval_mcc": 0.366324992569054, "eval_precision": 0.7240143369175627, "eval_recall": 0.35500878734622143, "eval_runtime": 3.7126, "eval_samples_per_second": 459.782, "eval_steps_per_second": 14.545, "step": 428 }, { "epoch": 3.0, "grad_norm": 1.832775354385376, "learning_rate": 0.00015733353233507826, "loss": 0.4371, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7521968365553603, "eval_f1": 0.5052631578947367, "eval_loss": 0.44420263171195984, "eval_mcc": 0.4015258565021214, "eval_precision": 0.7552447552447552, "eval_recall": 0.37961335676625657, "eval_runtime": 3.1594, "eval_samples_per_second": 540.296, "eval_steps_per_second": 17.092, "step": 642 }, { "epoch": 4.0, "grad_norm": 2.8890116214752197, "learning_rate": 0.0001258668258680626, "loss": 0.4321, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7398945518453427, "eval_f1": 0.43367346938775514, "eval_loss": 0.4393432140350342, "eval_mcc": 0.3683014058572899, "eval_precision": 0.7906976744186046, "eval_recall": 0.29876977152899825, "eval_runtime": 3.8868, "eval_samples_per_second": 439.178, "eval_steps_per_second": 13.893, "step": 856 } ], "logging_steps": 500, "max_steps": 1712, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 2099102657760.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.806613569484766, "learning_rate": 0.0002517336517361252, "num_train_epochs": 8, "temperature": 7 } }