{ "best_metric": 0.773286467486819, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-31/checkpoint-2140", "epoch": 11.0, "eval_steps": 500, "global_step": 2354, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.312666416168213, "learning_rate": 0.00024913095817095677, "loss": 0.5995, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.715875805506737, "eval_f1": 0.4300822561692127, "eval_loss": 0.5564725995063782, "eval_mcc": 0.2978271442275633, "eval_precision": 0.648936170212766, "eval_recall": 0.3216168717047452, "eval_runtime": 3.1678, "eval_samples_per_second": 538.866, "eval_steps_per_second": 17.047, "step": 214 }, { "epoch": 2.0, "grad_norm": 1.9955796003341675, "learning_rate": 0.00022421786235386112, "loss": 0.533, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7445811364967779, "eval_f1": 0.5514403292181069, "eval_loss": 0.5220016837120056, "eval_mcc": 0.39114524585919325, "eval_precision": 0.6650124069478908, "eval_recall": 0.4710017574692443, "eval_runtime": 3.1849, "eval_samples_per_second": 535.964, "eval_steps_per_second": 16.955, "step": 428 }, { "epoch": 3.0, "grad_norm": 2.413587808609009, "learning_rate": 0.00019930476653676543, "loss": 0.5076, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7504393673110721, "eval_f1": 0.4766584766584766, "eval_loss": 0.5150462985038757, "eval_mcc": 0.3981605013144147, "eval_precision": 0.7918367346938775, "eval_recall": 0.3409490333919156, "eval_runtime": 3.1543, "eval_samples_per_second": 541.159, "eval_steps_per_second": 17.119, "step": 642 }, { "epoch": 4.0, "grad_norm": 4.792643070220947, "learning_rate": 0.00017439167071966975, "loss": 0.4936, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7568834212067955, "eval_f1": 0.5077105575326216, "eval_loss": 0.5015135407447815, "eval_mcc": 0.4152737161319117, "eval_precision": 0.781021897810219, "eval_recall": 0.37609841827768015, "eval_runtime": 3.1947, "eval_samples_per_second": 534.327, "eval_steps_per_second": 16.903, "step": 856 }, { "epoch": 5.0, "grad_norm": 7.65413236618042, "learning_rate": 0.00014947857490257407, "loss": 0.4718, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7662565905096661, "eval_f1": 0.5741728922091782, "eval_loss": 0.4888531267642975, "eval_mcc": 0.4422168885701626, "eval_precision": 0.7309782608695652, "eval_recall": 0.4727592267135325, "eval_runtime": 3.1595, "eval_samples_per_second": 540.283, "eval_steps_per_second": 17.092, "step": 1070 }, { "epoch": 6.0, "grad_norm": 7.2619218826293945, "learning_rate": 0.00012456547908547839, "loss": 0.4651, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7627416520210897, "eval_f1": 0.5837615621788285, "eval_loss": 0.48531395196914673, "eval_mcc": 0.43661648172458356, "eval_precision": 0.7029702970297029, "eval_recall": 0.4991212653778559, "eval_runtime": 3.3544, "eval_samples_per_second": 508.885, "eval_steps_per_second": 16.098, "step": 1284 }, { "epoch": 7.0, "grad_norm": 9.458930969238281, "learning_rate": 9.965238326838272e-05, "loss": 0.4553, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.7633274751025191, "eval_f1": 0.536697247706422, "eval_loss": 0.48649853467941284, "eval_mcc": 0.4325666465390762, "eval_precision": 0.7722772277227723, "eval_recall": 0.4112478031634446, "eval_runtime": 3.1637, "eval_samples_per_second": 539.56, "eval_steps_per_second": 17.069, "step": 1498 }, { "epoch": 8.0, "grad_norm": 5.576714038848877, "learning_rate": 7.473928745128703e-05, "loss": 0.4423, "step": 1712 }, { "epoch": 8.0, "eval_accuracy": 0.7668424135910955, "eval_f1": 0.5738758029978586, "eval_loss": 0.48117396235466003, "eval_mcc": 0.4435339132892942, "eval_precision": 0.7342465753424657, "eval_recall": 0.4710017574692443, "eval_runtime": 3.2168, "eval_samples_per_second": 530.659, "eval_steps_per_second": 16.787, "step": 1712 }, { "epoch": 9.0, "grad_norm": 5.577918529510498, "learning_rate": 4.982619163419136e-05, "loss": 0.4406, "step": 1926 }, { "epoch": 9.0, "eval_accuracy": 0.7721148213239601, "eval_f1": 0.6026557711950972, "eval_loss": 0.46937549114227295, "eval_mcc": 0.4605923234016756, "eval_precision": 0.7195121951219512, "eval_recall": 0.5184534270650264, "eval_runtime": 3.1822, "eval_samples_per_second": 536.419, "eval_steps_per_second": 16.969, "step": 1926 }, { "epoch": 10.0, "grad_norm": 4.4660234451293945, "learning_rate": 2.491309581709568e-05, "loss": 0.4322, "step": 2140 }, { "epoch": 10.0, "eval_accuracy": 0.773286467486819, "eval_f1": 0.6063072227873855, "eval_loss": 0.47590258717536926, "eval_mcc": 0.4639025999570181, "eval_precision": 0.7198067632850241, "eval_recall": 0.523725834797891, "eval_runtime": 3.3263, "eval_samples_per_second": 513.185, "eval_steps_per_second": 16.234, "step": 2140 }, { "epoch": 11.0, "grad_norm": 4.3883819580078125, "learning_rate": 0.0, "loss": 0.4251, "step": 2354 }, { "epoch": 11.0, "eval_accuracy": 0.7697715289982425, "eval_f1": 0.6246418338108883, "eval_loss": 0.4704935848712921, "eval_mcc": 0.46404820539764163, "eval_precision": 0.6841004184100419, "eval_recall": 0.5746924428822495, "eval_runtime": 3.1424, "eval_samples_per_second": 543.22, "eval_steps_per_second": 17.184, "step": 2354 } ], "logging_steps": 500, "max_steps": 2354, "num_input_tokens_seen": 0, "num_train_epochs": 11, "save_steps": 500, "total_flos": 5772532308840.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.995655146834002, "learning_rate": 0.00027404405398805245, "num_train_epochs": 11, "temperature": 14 } }