{ "best_metric": 0.7607843137254902, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-13/checkpoint-1120", "epoch": 8.0, "eval_steps": 500, "global_step": 1280, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.6348164081573486, "learning_rate": 0.00028293467755282096, "loss": 0.4646, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.668235294117647, "eval_f1": 0.018561484918793503, "eval_loss": 0.43796491622924805, "eval_mcc": 0.04862166383263152, "eval_precision": 0.6666666666666666, "eval_recall": 0.009411764705882352, "eval_runtime": 1.8631, "eval_samples_per_second": 684.329, "eval_steps_per_second": 21.469, "step": 160 }, { "epoch": 2.0, "grad_norm": 1.2706644535064697, "learning_rate": 0.000242515437902418, "loss": 0.4357, "step": 320 }, { "epoch": 2.0, "eval_accuracy": 0.7184313725490196, "eval_f1": 0.37565217391304345, "eval_loss": 0.4178254306316376, "eval_mcc": 0.2995107121067069, "eval_precision": 0.72, "eval_recall": 0.2541176470588235, "eval_runtime": 1.8716, "eval_samples_per_second": 681.232, "eval_steps_per_second": 21.372, "step": 320 }, { "epoch": 3.0, "grad_norm": 2.069754123687744, "learning_rate": 0.00020209619825201498, "loss": 0.4247, "step": 480 }, { "epoch": 3.0, "eval_accuracy": 0.7419607843137255, "eval_f1": 0.5548037889039241, "eval_loss": 0.4210284948348999, "eval_mcc": 0.38745831003992626, "eval_precision": 0.6528662420382165, "eval_recall": 0.4823529411764706, "eval_runtime": 1.8687, "eval_samples_per_second": 682.3, "eval_steps_per_second": 21.405, "step": 480 }, { "epoch": 4.0, "grad_norm": 0.9811561703681946, "learning_rate": 0.000161676958601612, "loss": 0.4179, "step": 640 }, { "epoch": 4.0, "eval_accuracy": 0.7349019607843137, "eval_f1": 0.43666666666666665, "eval_loss": 0.4131487309932709, "eval_mcc": 0.3513388123150658, "eval_precision": 0.7485714285714286, "eval_recall": 0.30823529411764705, "eval_runtime": 1.8772, "eval_samples_per_second": 679.197, "eval_steps_per_second": 21.308, "step": 640 }, { "epoch": 5.0, "grad_norm": 1.1143430471420288, "learning_rate": 0.000121257718951209, "loss": 0.4113, "step": 800 }, { "epoch": 5.0, "eval_accuracy": 0.7545098039215686, "eval_f1": 0.5908496732026145, "eval_loss": 0.4124383330345154, "eval_mcc": 0.42389318898939016, "eval_precision": 0.6647058823529411, "eval_recall": 0.5317647058823529, "eval_runtime": 1.8732, "eval_samples_per_second": 680.66, "eval_steps_per_second": 21.354, "step": 800 }, { "epoch": 6.0, "grad_norm": 1.0988030433654785, "learning_rate": 8.0838479300806e-05, "loss": 0.4058, "step": 960 }, { "epoch": 6.0, "eval_accuracy": 0.7529411764705882, "eval_f1": 0.5493562231759658, "eval_loss": 0.4068860113620758, "eval_mcc": 0.40775530964263945, "eval_precision": 0.7007299270072993, "eval_recall": 0.45176470588235296, "eval_runtime": 1.8719, "eval_samples_per_second": 681.136, "eval_steps_per_second": 21.369, "step": 960 }, { "epoch": 7.0, "grad_norm": 1.373121738433838, "learning_rate": 4.0419239650403e-05, "loss": 0.407, "step": 1120 }, { "epoch": 7.0, "eval_accuracy": 0.7607843137254902, "eval_f1": 0.5960264900662252, "eval_loss": 0.4111056327819824, "eval_mcc": 0.43684887642861214, "eval_precision": 0.6818181818181818, "eval_recall": 0.5294117647058824, "eval_runtime": 1.8826, "eval_samples_per_second": 677.271, "eval_steps_per_second": 21.248, "step": 1120 }, { "epoch": 8.0, "grad_norm": 1.2871596813201904, "learning_rate": 0.0, "loss": 0.401, "step": 1280 }, { "epoch": 8.0, "eval_accuracy": 0.7521568627450981, "eval_f1": 0.5524079320113314, "eval_loss": 0.4067133963108063, "eval_mcc": 0.40673564442456084, "eval_precision": 0.693950177935943, "eval_recall": 0.4588235294117647, "eval_runtime": 1.8786, "eval_samples_per_second": 678.7, "eval_steps_per_second": 21.293, "step": 1280 } ], "logging_steps": 500, "max_steps": 1280, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 2334043503360.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.7266899336949371, "learning_rate": 0.000323353917203224, "num_train_epochs": 8, "temperature": 24 } }