{ "best_metric": 0.7615686274509804, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-12/checkpoint-1120", "epoch": 8.0, "eval_steps": 500, "global_step": 1280, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.5787363052368164, "learning_rate": 0.00024237605415726302, "loss": 0.4097, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.6658823529411765, "eval_f1": 0.009302325581395347, "eval_loss": 0.38458171486854553, "eval_mcc": 0.008873565094161137, "eval_precision": 0.4, "eval_recall": 0.004705882352941176, "eval_runtime": 1.8643, "eval_samples_per_second": 683.908, "eval_steps_per_second": 21.456, "step": 160 }, { "epoch": 2.0, "grad_norm": 0.8726533055305481, "learning_rate": 0.0002077509035633683, "loss": 0.3859, "step": 320 }, { "epoch": 2.0, "eval_accuracy": 0.7027450980392157, "eval_f1": 0.2612085769980507, "eval_loss": 0.37138044834136963, "eval_mcc": 0.24722748455315502, "eval_precision": 0.7613636363636364, "eval_recall": 0.15764705882352942, "eval_runtime": 1.863, "eval_samples_per_second": 684.383, "eval_steps_per_second": 21.471, "step": 320 }, { "epoch": 3.0, "grad_norm": 2.1823008060455322, "learning_rate": 0.00017312575296947358, "loss": 0.3761, "step": 480 }, { "epoch": 3.0, "eval_accuracy": 0.7419607843137255, "eval_f1": 0.5168869309838473, "eval_loss": 0.3662872314453125, "eval_mcc": 0.3765709652306714, "eval_precision": 0.6875, "eval_recall": 0.41411764705882353, "eval_runtime": 1.8612, "eval_samples_per_second": 685.044, "eval_steps_per_second": 21.492, "step": 480 }, { "epoch": 4.0, "grad_norm": 0.8506985306739807, "learning_rate": 0.00013850060237557887, "loss": 0.3709, "step": 640 }, { "epoch": 4.0, "eval_accuracy": 0.7231372549019608, "eval_f1": 0.3796133567662565, "eval_loss": 0.36449602246284485, "eval_mcc": 0.315387850148385, "eval_precision": 0.75, "eval_recall": 0.2541176470588235, "eval_runtime": 1.8672, "eval_samples_per_second": 682.835, "eval_steps_per_second": 21.422, "step": 640 }, { "epoch": 5.0, "grad_norm": 0.953569233417511, "learning_rate": 0.00010387545178168416, "loss": 0.3673, "step": 800 }, { "epoch": 5.0, "eval_accuracy": 0.7537254901960785, "eval_f1": 0.5552407932011331, "eval_loss": 0.3644358813762665, "eval_mcc": 0.41074948302085584, "eval_precision": 0.697508896797153, "eval_recall": 0.4611764705882353, "eval_runtime": 1.8615, "eval_samples_per_second": 684.938, "eval_steps_per_second": 21.488, "step": 800 }, { "epoch": 6.0, "grad_norm": 1.1368662118911743, "learning_rate": 6.925030118778943e-05, "loss": 0.3635, "step": 960 }, { "epoch": 6.0, "eval_accuracy": 0.7482352941176471, "eval_f1": 0.5172932330827068, "eval_loss": 0.3630056381225586, "eval_mcc": 0.3915780041490244, "eval_precision": 0.7166666666666667, "eval_recall": 0.4047058823529412, "eval_runtime": 1.8707, "eval_samples_per_second": 681.562, "eval_steps_per_second": 21.382, "step": 960 }, { "epoch": 7.0, "grad_norm": 1.311170220375061, "learning_rate": 3.4625150593894717e-05, "loss": 0.363, "step": 1120 }, { "epoch": 7.0, "eval_accuracy": 0.7615686274509804, "eval_f1": 0.5694050991501417, "eval_loss": 0.3646318018436432, "eval_mcc": 0.43081867600233087, "eval_precision": 0.7153024911032029, "eval_recall": 0.47294117647058825, "eval_runtime": 1.8727, "eval_samples_per_second": 680.818, "eval_steps_per_second": 21.359, "step": 1120 }, { "epoch": 8.0, "grad_norm": 1.0805109739303589, "learning_rate": 0.0, "loss": 0.3592, "step": 1280 }, { "epoch": 8.0, "eval_accuracy": 0.7513725490196078, "eval_f1": 0.5261584454409567, "eval_loss": 0.3618643283843994, "eval_mcc": 0.40038619635473977, "eval_precision": 0.7213114754098361, "eval_recall": 0.41411764705882353, "eval_runtime": 1.8765, "eval_samples_per_second": 679.461, "eval_steps_per_second": 21.316, "step": 1280 } ], "logging_steps": 500, "max_steps": 1280, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 2334043503360.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.6289598397405775, "learning_rate": 0.00027700120475115773, "num_train_epochs": 8, "temperature": 22 } }