{ "best_metric": 0.7475538160469667, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-1344", "epoch": 9.0, "eval_steps": 500, "global_step": 1728, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.0371571779251099, "learning_rate": 0.0001305282668790618, "loss": 0.3216, "step": 192 }, { "epoch": 1.0, "eval_accuracy": 0.5499021526418787, "eval_f1": 0.23841059602649006, "eval_loss": 0.2955350875854492, "eval_mcc": 0.17350853577425898, "eval_precision": 0.7741935483870968, "eval_recall": 0.14090019569471623, "eval_runtime": 67.6225, "eval_samples_per_second": 15.113, "eval_steps_per_second": 0.473, "step": 192 }, { "epoch": 2.0, "grad_norm": 3.1835968494415283, "learning_rate": 0.00011421223351917907, "loss": 0.2979, "step": 384 }, { "epoch": 2.0, "eval_accuracy": 0.5587084148727984, "eval_f1": 0.2642740619902121, "eval_loss": 0.29196101427078247, "eval_mcc": 0.19586518668589045, "eval_precision": 0.7941176470588235, "eval_recall": 0.15851272015655576, "eval_runtime": 67.5618, "eval_samples_per_second": 15.127, "eval_steps_per_second": 0.474, "step": 384 }, { "epoch": 3.0, "grad_norm": 1.4907313585281372, "learning_rate": 9.789620015929635e-05, "loss": 0.288, "step": 576 }, { "epoch": 3.0, "eval_accuracy": 0.6555772994129159, "eval_f1": 0.5243243243243243, "eval_loss": 0.28221988677978516, "eval_mcc": 0.37311506920915233, "eval_precision": 0.8471615720524017, "eval_recall": 0.3796477495107632, "eval_runtime": 67.1649, "eval_samples_per_second": 15.216, "eval_steps_per_second": 0.476, "step": 576 }, { "epoch": 4.0, "grad_norm": 2.161053419113159, "learning_rate": 8.158016679941362e-05, "loss": 0.2856, "step": 768 }, { "epoch": 4.0, "eval_accuracy": 0.5968688845401174, "eval_f1": 0.3757575757575758, "eval_loss": 0.2846841514110565, "eval_mcc": 0.274495316321839, "eval_precision": 0.8322147651006712, "eval_recall": 0.24266144814090018, "eval_runtime": 67.4167, "eval_samples_per_second": 15.159, "eval_steps_per_second": 0.475, "step": 768 }, { "epoch": 5.0, "grad_norm": 1.5259861946105957, "learning_rate": 6.52641334395309e-05, "loss": 0.2841, "step": 960 }, { "epoch": 5.0, "eval_accuracy": 0.6829745596868885, "eval_f1": 0.5759162303664921, "eval_loss": 0.28138452768325806, "eval_mcc": 0.42395345222624214, "eval_precision": 0.8695652173913043, "eval_recall": 0.43052837573385516, "eval_runtime": 67.041, "eval_samples_per_second": 15.244, "eval_steps_per_second": 0.477, "step": 960 }, { "epoch": 6.0, "grad_norm": 1.6680471897125244, "learning_rate": 4.8948100079648175e-05, "loss": 0.2809, "step": 1152 }, { "epoch": 6.0, "eval_accuracy": 0.7270058708414873, "eval_f1": 0.6674612634088201, "eval_loss": 0.282234251499176, "eval_mcc": 0.48626311495705427, "eval_precision": 0.8536585365853658, "eval_recall": 0.547945205479452, "eval_runtime": 67.379, "eval_samples_per_second": 15.168, "eval_steps_per_second": 0.475, "step": 1152 }, { "epoch": 7.0, "grad_norm": 1.0517845153808594, "learning_rate": 3.263206671976545e-05, "loss": 0.2793, "step": 1344 }, { "epoch": 7.0, "eval_accuracy": 0.7475538160469667, "eval_f1": 0.7041284403669726, "eval_loss": 0.28309565782546997, "eval_mcc": 0.5179241840022546, "eval_precision": 0.850415512465374, "eval_recall": 0.6007827788649707, "eval_runtime": 67.566, "eval_samples_per_second": 15.126, "eval_steps_per_second": 0.474, "step": 1344 }, { "epoch": 8.0, "grad_norm": 1.6533994674682617, "learning_rate": 1.6316033359882725e-05, "loss": 0.2774, "step": 1536 }, { "epoch": 8.0, "eval_accuracy": 0.7221135029354208, "eval_f1": 0.6594724220623502, "eval_loss": 0.282230943441391, "eval_mcc": 0.4777338430619903, "eval_precision": 0.8513931888544891, "eval_recall": 0.538160469667319, "eval_runtime": 67.39, "eval_samples_per_second": 15.165, "eval_steps_per_second": 0.475, "step": 1536 }, { "epoch": 9.0, "grad_norm": 0.621838390827179, "learning_rate": 0.0, "loss": 0.2774, "step": 1728 }, { "epoch": 9.0, "eval_accuracy": 0.7142857142857143, "eval_f1": 0.642156862745098, "eval_loss": 0.2810159921646118, "eval_mcc": 0.46831127867565187, "eval_precision": 0.8590163934426229, "eval_recall": 0.512720156555773, "eval_runtime": 67.055, "eval_samples_per_second": 15.241, "eval_steps_per_second": 0.477, "step": 1728 } ], "logging_steps": 500, "max_steps": 1728, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "total_flos": 2121256775520.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "alpha": 0.4208671172864604, "learning_rate": 0.00014684430023894452, "num_train_epochs": 9, "per_device_train_batch_size": 16, "temperature": 48 } }