{ "best_metric": 0.7633274751025191, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-13/checkpoint-1284", "epoch": 7.0, "eval_steps": 500, "global_step": 1498, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.252918720245361, "learning_rate": 0.00015581725710675, "loss": 0.4793, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.6994727592267135, "eval_f1": 0.2982216142270862, "eval_loss": 0.45286598801612854, "eval_mcc": 0.23321001518594284, "eval_precision": 0.6728395061728395, "eval_recall": 0.19156414762741653, "eval_runtime": 3.1418, "eval_samples_per_second": 543.327, "eval_steps_per_second": 17.188, "step": 214 }, { "epoch": 2.0, "grad_norm": 1.6013866662979126, "learning_rate": 0.00013850422853933333, "loss": 0.4377, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.7393087287639133, "eval_f1": 0.44165621079046424, "eval_loss": 0.4278346300125122, "eval_mcc": 0.36530426808949645, "eval_precision": 0.7719298245614035, "eval_recall": 0.3093145869947276, "eval_runtime": 3.129, "eval_samples_per_second": 545.542, "eval_steps_per_second": 17.258, "step": 428 }, { "epoch": 3.0, "grad_norm": 1.8795703649520874, "learning_rate": 0.00012119119997191666, "loss": 0.4198, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7410661980082015, "eval_f1": 0.45566502463054187, "eval_loss": 0.4231508672237396, "eval_mcc": 0.36988431980614395, "eval_precision": 0.7613168724279835, "eval_recall": 0.3251318101933216, "eval_runtime": 3.1432, "eval_samples_per_second": 543.083, "eval_steps_per_second": 17.18, "step": 642 }, { "epoch": 4.0, "grad_norm": 3.028773784637451, "learning_rate": 0.00010387817140449999, "loss": 0.4158, "step": 856 }, { "epoch": 4.0, "eval_accuracy": 0.7492677211482133, "eval_f1": 0.48681055155875297, "eval_loss": 0.41446971893310547, "eval_mcc": 0.3934937852291293, "eval_precision": 0.7660377358490567, "eval_recall": 0.35676625659050965, "eval_runtime": 3.1319, "eval_samples_per_second": 545.042, "eval_steps_per_second": 17.242, "step": 856 }, { "epoch": 5.0, "grad_norm": 2.238177537918091, "learning_rate": 8.656514283708333e-05, "loss": 0.4093, "step": 1070 }, { "epoch": 5.0, "eval_accuracy": 0.7545401288810779, "eval_f1": 0.5144843568945539, "eval_loss": 0.41232869029045105, "eval_mcc": 0.40811560014590953, "eval_precision": 0.7551020408163265, "eval_recall": 0.39015817223198596, "eval_runtime": 3.1304, "eval_samples_per_second": 545.304, "eval_steps_per_second": 17.25, "step": 1070 }, { "epoch": 6.0, "grad_norm": 2.982135772705078, "learning_rate": 6.925211426966667e-05, "loss": 0.406, "step": 1284 }, { "epoch": 6.0, "eval_accuracy": 0.7633274751025191, "eval_f1": 0.5817805383022774, "eval_loss": 0.4112951457500458, "eval_mcc": 0.4373095480521575, "eval_precision": 0.707808564231738, "eval_recall": 0.4938488576449912, "eval_runtime": 3.1421, "eval_samples_per_second": 543.275, "eval_steps_per_second": 17.186, "step": 1284 }, { "epoch": 7.0, "grad_norm": 5.224627494812012, "learning_rate": 5.1939085702249996e-05, "loss": 0.4012, "step": 1498 }, { "epoch": 7.0, "eval_accuracy": 0.760398359695372, "eval_f1": 0.5193889541715628, "eval_loss": 0.40900105237960815, "eval_mcc": 0.4249892956955117, "eval_precision": 0.7836879432624113, "eval_recall": 0.3884007029876977, "eval_runtime": 3.1417, "eval_samples_per_second": 543.33, "eval_steps_per_second": 17.188, "step": 1498 } ], "logging_steps": 500, "max_steps": 2140, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3673429651080.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.7600265334081553, "learning_rate": 0.00017313028567416666, "num_train_epochs": 10, "temperature": 20 } }