{ "best_metric": 0.7932044522554189, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-22/checkpoint-2568", "epoch": 12.0, "eval_steps": 500, "global_step": 2568, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.217133522033691, "learning_rate": 0.00032487441871616396, "loss": 0.5489, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.7164616285881664, "eval_f1": 0.39800995024875624, "eval_loss": 0.5112811923027039, "eval_mcc": 0.2945528643838721, "eval_precision": 0.6808510638297872, "eval_recall": 0.281195079086116, "eval_runtime": 3.1646, "eval_samples_per_second": 539.397, "eval_steps_per_second": 17.064, "step": 214 }, { "epoch": 2.0, "grad_norm": 1.936843991279602, "learning_rate": 0.00030777576509952376, "loss": 0.4958, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7363796133567663, "eval_f1": 0.4966442953020134, "eval_loss": 0.4881761372089386, "eval_mcc": 0.3597855187518067, "eval_precision": 0.683076923076923, "eval_recall": 0.39015817223198596, "eval_runtime": 3.9406, "eval_samples_per_second": 433.184, "eval_steps_per_second": 13.704, "step": 428 }, { "epoch": 3.0, "grad_norm": 2.401139497756958, "learning_rate": 0.0002906771114828835, "loss": 0.481, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7246631517281781, "eval_f1": 0.35792349726775957, "eval_loss": 0.4936811029911041, "eval_mcc": 0.32418684508290907, "eval_precision": 0.803680981595092, "eval_recall": 0.23022847100175747, "eval_runtime": 3.1821, "eval_samples_per_second": 536.439, "eval_steps_per_second": 16.97, "step": 642 }, { "epoch": 4.0, "grad_norm": 2.7682132720947266, "learning_rate": 0.00027357845786624336, "loss": 0.47, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7510251903925015, "eval_f1": 0.47595561035758327, "eval_loss": 0.4672105610370636, "eval_mcc": 0.4002104345910371, "eval_precision": 0.7975206611570248, "eval_recall": 0.3391915641476274, "eval_runtime": 3.3008, "eval_samples_per_second": 517.142, "eval_steps_per_second": 16.359, "step": 856 }, { "epoch": 5.0, "grad_norm": 4.235007286071777, "learning_rate": 0.0002564798042496031, "loss": 0.4564, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7639132981839485, "eval_f1": 0.6385650224215247, "eval_loss": 0.46502017974853516, "eval_mcc": 0.46359962281417727, "eval_precision": 0.652014652014652, "eval_recall": 0.6256590509666081, "eval_runtime": 3.1354, "eval_samples_per_second": 544.434, "eval_steps_per_second": 17.223, "step": 1070 }, { "epoch": 6.0, "grad_norm": 3.1712186336517334, "learning_rate": 0.0002393811506329629, "loss": 0.4494, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7650849443468073, "eval_f1": 0.6125603864734299, "eval_loss": 0.45874524116516113, "eval_mcc": 0.4509702487241507, "eval_precision": 0.6802575107296137, "eval_recall": 0.5571177504393673, "eval_runtime": 3.3104, "eval_samples_per_second": 515.646, "eval_steps_per_second": 16.312, "step": 1284 }, { "epoch": 7.0, "grad_norm": 5.33981466293335, "learning_rate": 0.0002222824970163227, "loss": 0.4385, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.7721148213239601, "eval_f1": 0.5682574916759155, "eval_loss": 0.4540397524833679, "eval_mcc": 0.4563006637386888, "eval_precision": 0.7710843373493976, "eval_recall": 0.44991212653778556, "eval_runtime": 3.165, "eval_samples_per_second": 539.344, "eval_steps_per_second": 17.062, "step": 1498 }, { "epoch": 8.0, "grad_norm": 2.767106294631958, "learning_rate": 0.00020518384339968248, "loss": 0.431, "step": 1712 }, { "epoch": 8.0, "eval_accuracy": 0.7727006444053895, "eval_f1": 0.5773420479302832, "eval_loss": 0.4424216151237488, "eval_mcc": 0.4580968046120123, "eval_precision": 0.7593123209169055, "eval_recall": 0.46572934973637964, "eval_runtime": 4.1398, "eval_samples_per_second": 412.342, "eval_steps_per_second": 13.044, "step": 1712 }, { "epoch": 9.0, "grad_norm": 7.568024635314941, "learning_rate": 0.0001880851897830423, "loss": 0.4269, "step": 1926 }, { "epoch": 9.0, "eval_accuracy": 0.7691857059168131, "eval_f1": 0.5553047404063206, "eval_loss": 0.4476867616176605, "eval_mcc": 0.4484665614833703, "eval_precision": 0.7760252365930599, "eval_recall": 0.43233743409490333, "eval_runtime": 3.1524, "eval_samples_per_second": 541.497, "eval_steps_per_second": 17.13, "step": 1926 }, { "epoch": 10.0, "grad_norm": 3.2477447986602783, "learning_rate": 0.00017098653616640208, "loss": 0.4186, "step": 2140 }, { "epoch": 10.0, "eval_accuracy": 0.7768014059753954, "eval_f1": 0.6193806193806194, "eval_loss": 0.45719364285469055, "eval_mcc": 0.4744794188942744, "eval_precision": 0.7175925925925926, "eval_recall": 0.5448154657293497, "eval_runtime": 3.2524, "eval_samples_per_second": 524.847, "eval_steps_per_second": 16.603, "step": 2140 }, { "epoch": 11.0, "grad_norm": 2.3632404804229736, "learning_rate": 0.00015388788254976188, "loss": 0.4109, "step": 2354 }, { "epoch": 11.0, "eval_accuracy": 0.773286467486819, "eval_f1": 0.5587229190421892, "eval_loss": 0.44749346375465393, "eval_mcc": 0.45996910450651607, "eval_precision": 0.7954545454545454, "eval_recall": 0.4305799648506151, "eval_runtime": 3.1492, "eval_samples_per_second": 542.04, "eval_steps_per_second": 17.147, "step": 2354 }, { "epoch": 12.0, "grad_norm": 2.5821080207824707, "learning_rate": 0.00013678922893312168, "loss": 0.4098, "step": 2568 }, { "epoch": 12.0, "eval_accuracy": 0.7932044522554189, "eval_f1": 0.6672950047125353, "eval_loss": 0.43375280499458313, "eval_mcc": 0.5213017315650612, "eval_precision": 0.7195121951219512, "eval_recall": 0.6221441124780316, "eval_runtime": 3.3029, "eval_samples_per_second": 516.82, "eval_steps_per_second": 16.349, "step": 2568 } ], "logging_steps": 500, "max_steps": 4280, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 6297307973280.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.8988863482372167, "learning_rate": 0.00034197307233280416, "num_train_epochs": 20, "temperature": 20 } }