{ "best_metric": 0.7301960784313726, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-800", "epoch": 6.0, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.012742280960083, "learning_rate": 0.0007591954920690624, "loss": 0.5869, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.0, "eval_loss": 0.5785399675369263, "eval_mcc": 0.0, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 1.8697, "eval_samples_per_second": 681.924, "eval_steps_per_second": 21.394, "step": 160 }, { "epoch": 2.0, "grad_norm": 2.057482957839966, "learning_rate": 0.0006642960555604296, "loss": 0.5715, "step": 320 }, { "epoch": 2.0, "eval_accuracy": 0.676078431372549, "eval_f1": 0.4411366711772666, "eval_loss": 0.5550761222839355, "eval_mcc": 0.22526645932553852, "eval_precision": 0.5191082802547771, "eval_recall": 0.3835294117647059, "eval_runtime": 1.8672, "eval_samples_per_second": 682.827, "eval_steps_per_second": 21.422, "step": 320 }, { "epoch": 3.0, "grad_norm": 2.645397663116455, "learning_rate": 0.0005693966190517968, "loss": 0.5736, "step": 480 }, { "epoch": 3.0, "eval_accuracy": 0.7043137254901961, "eval_f1": 0.28733459357277885, "eval_loss": 0.5566152334213257, "eval_mcc": 0.25125318174069416, "eval_precision": 0.7307692307692307, "eval_recall": 0.17882352941176471, "eval_runtime": 1.8663, "eval_samples_per_second": 683.154, "eval_steps_per_second": 21.432, "step": 480 }, { "epoch": 4.0, "grad_norm": 1.3315376043319702, "learning_rate": 0.000474497182543164, "loss": 0.5569, "step": 640 }, { "epoch": 4.0, "eval_accuracy": 0.7113725490196079, "eval_f1": 0.4121405750798722, "eval_loss": 0.5367588400840759, "eval_mcc": 0.2830725629191131, "eval_precision": 0.6417910447761194, "eval_recall": 0.3035294117647059, "eval_runtime": 1.8722, "eval_samples_per_second": 681.014, "eval_steps_per_second": 21.365, "step": 640 }, { "epoch": 5.0, "grad_norm": 1.5584187507629395, "learning_rate": 0.0003795977460345312, "loss": 0.5469, "step": 800 }, { "epoch": 5.0, "eval_accuracy": 0.7301960784313726, "eval_f1": 0.50997150997151, "eval_loss": 0.5246202945709229, "eval_mcc": 0.3496658305237892, "eval_precision": 0.6462093862815884, "eval_recall": 0.4211764705882353, "eval_runtime": 1.8851, "eval_samples_per_second": 676.35, "eval_steps_per_second": 21.219, "step": 800 }, { "epoch": 6.0, "grad_norm": 3.2840497493743896, "learning_rate": 0.0002846983095258984, "loss": 0.5382, "step": 960 }, { "epoch": 6.0, "eval_accuracy": 0.7168627450980393, "eval_f1": 0.5305591677503251, "eval_loss": 0.5253874659538269, "eval_mcc": 0.3348618979447744, "eval_precision": 0.5930232558139535, "eval_recall": 0.48, "eval_runtime": 1.8686, "eval_samples_per_second": 682.333, "eval_steps_per_second": 21.407, "step": 960 } ], "logging_steps": 500, "max_steps": 1440, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "total_flos": 1750532627520.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.9116955099903541, "learning_rate": 0.0008540949285776952, "num_train_epochs": 9, "temperature": 21 } }