{ "best_metric": 0.7733333333333333, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-20/checkpoint-960", "epoch": 7.0, "eval_steps": 500, "global_step": 1120, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.297672986984253, "learning_rate": 0.0004503757705353633, "loss": 0.5065, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.6909803921568628, "eval_f1": 0.286231884057971, "eval_loss": 0.46740272641181946, "eval_mcc": 0.20370649805068908, "eval_precision": 0.6220472440944882, "eval_recall": 0.18588235294117647, "eval_runtime": 1.8709, "eval_samples_per_second": 681.504, "eval_steps_per_second": 21.381, "step": 160 }, { "epoch": 2.0, "grad_norm": 2.163440227508545, "learning_rate": 0.0003753131421128028, "loss": 0.472, "step": 320 }, { "epoch": 2.0, "eval_accuracy": 0.7090196078431372, "eval_f1": 0.29601518026565465, "eval_loss": 0.447980672121048, "eval_mcc": 0.26984218289739087, "eval_precision": 0.7647058823529411, "eval_recall": 0.18352941176470589, "eval_runtime": 2.1203, "eval_samples_per_second": 601.317, "eval_steps_per_second": 18.865, "step": 320 }, { "epoch": 3.0, "grad_norm": 2.110410690307617, "learning_rate": 0.0003002505136902422, "loss": 0.4616, "step": 480 }, { "epoch": 3.0, "eval_accuracy": 0.7262745098039216, "eval_f1": 0.5146036161335187, "eval_loss": 0.445524126291275, "eval_mcc": 0.34365078524294146, "eval_precision": 0.6292517006802721, "eval_recall": 0.43529411764705883, "eval_runtime": 1.8755, "eval_samples_per_second": 679.828, "eval_steps_per_second": 21.328, "step": 480 }, { "epoch": 4.0, "grad_norm": 1.1434326171875, "learning_rate": 0.00022518788526768165, "loss": 0.452, "step": 640 }, { "epoch": 4.0, "eval_accuracy": 0.7341176470588235, "eval_f1": 0.4593301435406698, "eval_loss": 0.44121384620666504, "eval_mcc": 0.3493311330281174, "eval_precision": 0.7128712871287128, "eval_recall": 0.3388235294117647, "eval_runtime": 1.8788, "eval_samples_per_second": 678.621, "eval_steps_per_second": 21.29, "step": 640 }, { "epoch": 5.0, "grad_norm": 1.2687256336212158, "learning_rate": 0.0001501252568451211, "loss": 0.4436, "step": 800 }, { "epoch": 5.0, "eval_accuracy": 0.7607843137254902, "eval_f1": 0.5970937912813739, "eval_loss": 0.43906503915786743, "eval_mcc": 0.4372564381540906, "eval_precision": 0.6807228915662651, "eval_recall": 0.5317647058823529, "eval_runtime": 1.8778, "eval_samples_per_second": 678.984, "eval_steps_per_second": 21.301, "step": 800 }, { "epoch": 6.0, "grad_norm": 1.7441729307174683, "learning_rate": 7.506262842256055e-05, "loss": 0.4334, "step": 960 }, { "epoch": 6.0, "eval_accuracy": 0.7733333333333333, "eval_f1": 0.611036339165545, "eval_loss": 0.4335384964942932, "eval_mcc": 0.46528826388603933, "eval_precision": 0.7138364779874213, "eval_recall": 0.5341176470588235, "eval_runtime": 1.8888, "eval_samples_per_second": 675.037, "eval_steps_per_second": 21.178, "step": 960 }, { "epoch": 7.0, "grad_norm": 1.9164094924926758, "learning_rate": 0.0, "loss": 0.4319, "step": 1120 }, { "epoch": 7.0, "eval_accuracy": 0.7694117647058824, "eval_f1": 0.5870786516853933, "eval_loss": 0.429920494556427, "eval_mcc": 0.45148621894108926, "eval_precision": 0.7282229965156795, "eval_recall": 0.49176470588235294, "eval_runtime": 1.8741, "eval_samples_per_second": 680.328, "eval_steps_per_second": 21.344, "step": 1120 } ], "logging_steps": 500, "max_steps": 1120, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 2042288065440.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.796338716906447, "learning_rate": 0.0005254383989579239, "num_train_epochs": 7, "temperature": 7 } }