{ "best_metric": 0.7217340363210311, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-2782", "epoch": 13.0, "eval_steps": 500, "global_step": 2782, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.161219835281372, "learning_rate": 0.0001946740481873714, "loss": 0.2896, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.0, "eval_loss": 0.27429890632629395, "eval_mcc": 0.0, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 3.1537, "eval_samples_per_second": 541.276, "eval_steps_per_second": 17.123, "step": 214 }, { "epoch": 2.0, "grad_norm": 1.0970500707626343, "learning_rate": 0.00017969912140372742, "loss": 0.2734, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.006980802792321117, "eval_loss": 0.2710207402706146, "eval_mcc": 0.01713474628469157, "eval_precision": 0.5, "eval_recall": 0.0035149384885764497, "eval_runtime": 3.9301, "eval_samples_per_second": 434.342, "eval_steps_per_second": 13.74, "step": 428 }, { "epoch": 3.0, "grad_norm": 0.7171841859817505, "learning_rate": 0.0001647241946200835, "loss": 0.2685, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.6678383128295254, "eval_f1": 0.010471204188481676, "eval_loss": 0.27606382966041565, "eval_mcc": 0.042836865711728934, "eval_precision": 0.75, "eval_recall": 0.005272407732864675, "eval_runtime": 3.1441, "eval_samples_per_second": 542.926, "eval_steps_per_second": 17.175, "step": 642 }, { "epoch": 4.0, "grad_norm": 1.334978699684143, "learning_rate": 0.00014974926783643954, "loss": 0.266, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.6795547744581136, "eval_f1": 0.10180623973727422, "eval_loss": 0.26485475897789, "eval_mcc": 0.14513196526792949, "eval_precision": 0.775, "eval_recall": 0.054481546572934976, "eval_runtime": 3.1938, "eval_samples_per_second": 534.478, "eval_steps_per_second": 16.908, "step": 856 }, { "epoch": 5.0, "grad_norm": 1.377930998802185, "learning_rate": 0.0001347743410527956, "loss": 0.2643, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.6918570591681312, "eval_f1": 0.18827160493827164, "eval_loss": 0.26378217339515686, "eval_mcc": 0.20505841470507494, "eval_precision": 0.7721518987341772, "eval_recall": 0.10720562390158173, "eval_runtime": 3.1292, "eval_samples_per_second": 545.505, "eval_steps_per_second": 17.257, "step": 1070 }, { "epoch": 6.0, "grad_norm": 1.2771140336990356, "learning_rate": 0.00011979941426915163, "loss": 0.263, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.6936145284124194, "eval_f1": 0.18662519440124417, "eval_loss": 0.26149189472198486, "eval_mcc": 0.2156164618376391, "eval_precision": 0.8108108108108109, "eval_recall": 0.1054481546572935, "eval_runtime": 3.2914, "eval_samples_per_second": 518.626, "eval_steps_per_second": 16.406, "step": 1284 }, { "epoch": 7.0, "grad_norm": 1.073453426361084, "learning_rate": 0.00010482448748550767, "loss": 0.2612, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.6994727592267135, "eval_f1": 0.21439509954058195, "eval_loss": 0.2620287537574768, "eval_mcc": 0.24129962353457945, "eval_precision": 0.8333333333333334, "eval_recall": 0.12302284710017575, "eval_runtime": 3.1567, "eval_samples_per_second": 540.751, "eval_steps_per_second": 17.106, "step": 1498 }, { "epoch": 8.0, "grad_norm": 1.2691621780395508, "learning_rate": 8.984956070186371e-05, "loss": 0.2597, "step": 1712 }, { "epoch": 8.0, "eval_accuracy": 0.69302870533099, "eval_f1": 0.17088607594936708, "eval_loss": 0.2611652910709381, "eval_mcc": 0.21751991027491313, "eval_precision": 0.8571428571428571, "eval_recall": 0.09490333919156414, "eval_runtime": 3.2468, "eval_samples_per_second": 525.752, "eval_steps_per_second": 16.632, "step": 1712 }, { "epoch": 9.0, "grad_norm": 1.0226393938064575, "learning_rate": 7.487463391821977e-05, "loss": 0.2597, "step": 1926 }, { "epoch": 9.0, "eval_accuracy": 0.6977152899824253, "eval_f1": 0.19626168224299068, "eval_loss": 0.2611730098724365, "eval_mcc": 0.2374955820778862, "eval_precision": 0.863013698630137, "eval_recall": 0.11072056239015818, "eval_runtime": 3.1639, "eval_samples_per_second": 539.53, "eval_steps_per_second": 17.068, "step": 1926 }, { "epoch": 10.0, "grad_norm": 1.0377492904663086, "learning_rate": 5.989970713457581e-05, "loss": 0.2565, "step": 2140 }, { "epoch": 10.0, "eval_accuracy": 0.7193907439953134, "eval_f1": 0.32248939179632247, "eval_loss": 0.260220468044281, "eval_mcc": 0.310001756502818, "eval_precision": 0.8260869565217391, "eval_recall": 0.20035149384885764, "eval_runtime": 3.2066, "eval_samples_per_second": 532.341, "eval_steps_per_second": 16.84, "step": 2140 }, { "epoch": 11.0, "grad_norm": 1.2514437437057495, "learning_rate": 4.4924780350931855e-05, "loss": 0.2555, "step": 2354 }, { "epoch": 11.0, "eval_accuracy": 0.700058582308143, "eval_f1": 0.20743034055727552, "eval_loss": 0.26075002551078796, "eval_mcc": 0.2474956228703306, "eval_precision": 0.8701298701298701, "eval_recall": 0.11775043936731107, "eval_runtime": 3.1394, "eval_samples_per_second": 543.734, "eval_steps_per_second": 17.201, "step": 2354 }, { "epoch": 12.0, "grad_norm": 0.8049026727676392, "learning_rate": 2.9949853567287906e-05, "loss": 0.2544, "step": 2568 }, { "epoch": 12.0, "eval_accuracy": 0.715875805506737, "eval_f1": 0.31593794076163606, "eval_loss": 0.2587771415710449, "eval_mcc": 0.29589835954792404, "eval_precision": 0.8, "eval_recall": 0.1968365553602812, "eval_runtime": 3.3501, "eval_samples_per_second": 509.539, "eval_steps_per_second": 16.119, "step": 2568 }, { "epoch": 13.0, "grad_norm": 2.94110369682312, "learning_rate": 1.4974926783643953e-05, "loss": 0.2544, "step": 2782 }, { "epoch": 13.0, "eval_accuracy": 0.7217340363210311, "eval_f1": 0.3356643356643356, "eval_loss": 0.2589167356491089, "eval_mcc": 0.31697199705587376, "eval_precision": 0.821917808219178, "eval_recall": 0.210896309314587, "eval_runtime": 3.165, "eval_samples_per_second": 539.337, "eval_steps_per_second": 17.062, "step": 2782 } ], "logging_steps": 500, "max_steps": 2996, "num_input_tokens_seen": 0, "num_train_epochs": 14, "save_steps": 500, "total_flos": 6822083637720.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.43581262355237016, "learning_rate": 0.00020964897497101535, "num_train_epochs": 14, "temperature": 35 } }