{ "best_metric": 0.8038990825688074, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-2635", "epoch": 6.0, "eval_steps": 500, "global_step": 3162, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.212067127227783, "learning_rate": 1.7783082729779372e-05, "loss": 1.8496, "step": 527 }, { "epoch": 1.0, "eval_accuracy": 0.7717889908256881, "eval_loss": 1.3957021236419678, "eval_runtime": 2.8127, "eval_samples_per_second": 310.024, "eval_steps_per_second": 2.489, "step": 527 }, { "epoch": 2.0, "grad_norm": NaN, "learning_rate": 1.4233214981823416e-05, "loss": 1.2959, "step": 1054 }, { "epoch": 2.0, "eval_accuracy": 0.7924311926605505, "eval_loss": 1.2182828187942505, "eval_runtime": 2.8191, "eval_samples_per_second": 309.323, "eval_steps_per_second": 2.483, "step": 1054 }, { "epoch": 3.0, "grad_norm": 32.567195892333984, "learning_rate": 1.067659843586754e-05, "loss": 1.0858, "step": 1581 }, { "epoch": 3.0, "eval_accuracy": 0.7958715596330275, "eval_loss": 1.1549702882766724, "eval_runtime": 2.8186, "eval_samples_per_second": 309.375, "eval_steps_per_second": 2.484, "step": 1581 }, { "epoch": 4.0, "grad_norm": 19.192340850830078, "learning_rate": 7.119981889911666e-06, "loss": 0.9641, "step": 2108 }, { "epoch": 4.0, "eval_accuracy": 0.801605504587156, "eval_loss": 1.103049874305725, "eval_runtime": 2.81, "eval_samples_per_second": 310.323, "eval_steps_per_second": 2.491, "step": 2108 }, { "epoch": 5.0, "grad_norm": 11.936226844787598, "learning_rate": 3.563365343955791e-06, "loss": 0.9032, "step": 2635 }, { "epoch": 5.0, "eval_accuracy": 0.8038990825688074, "eval_loss": 1.0780220031738281, "eval_runtime": 2.8278, "eval_samples_per_second": 308.366, "eval_steps_per_second": 2.475, "step": 2635 }, { "epoch": 6.0, "grad_norm": 28.104488372802734, "learning_rate": 6.748797999916271e-09, "loss": 0.8596, "step": 3162 }, { "epoch": 6.0, "eval_accuracy": 0.8038990825688074, "eval_loss": 1.0743507146835327, "eval_runtime": 2.8183, "eval_samples_per_second": 309.402, "eval_steps_per_second": 2.484, "step": 3162 } ], "logging_steps": 500, "max_steps": 3162, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 48527917525620.0, "train_batch_size": 128, "trial_name": null, "trial_params": { "alpha": 0.6704103438400755, "learning_rate": 2.1339699275735247e-05, "num_train_epochs": 6, "temperature": 15 } }