|
{ |
|
"best_metric": 0.7733333333333333, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-20/checkpoint-960", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1120, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.297672986984253, |
|
"learning_rate": 0.0004503757705353633, |
|
"loss": 0.5065, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6909803921568628, |
|
"eval_f1": 0.286231884057971, |
|
"eval_loss": 0.46740272641181946, |
|
"eval_mcc": 0.20370649805068908, |
|
"eval_precision": 0.6220472440944882, |
|
"eval_recall": 0.18588235294117647, |
|
"eval_runtime": 1.8709, |
|
"eval_samples_per_second": 681.504, |
|
"eval_steps_per_second": 21.381, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.163440227508545, |
|
"learning_rate": 0.0003753131421128028, |
|
"loss": 0.472, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7090196078431372, |
|
"eval_f1": 0.29601518026565465, |
|
"eval_loss": 0.447980672121048, |
|
"eval_mcc": 0.26984218289739087, |
|
"eval_precision": 0.7647058823529411, |
|
"eval_recall": 0.18352941176470589, |
|
"eval_runtime": 2.1203, |
|
"eval_samples_per_second": 601.317, |
|
"eval_steps_per_second": 18.865, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.110410690307617, |
|
"learning_rate": 0.0003002505136902422, |
|
"loss": 0.4616, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7262745098039216, |
|
"eval_f1": 0.5146036161335187, |
|
"eval_loss": 0.445524126291275, |
|
"eval_mcc": 0.34365078524294146, |
|
"eval_precision": 0.6292517006802721, |
|
"eval_recall": 0.43529411764705883, |
|
"eval_runtime": 1.8755, |
|
"eval_samples_per_second": 679.828, |
|
"eval_steps_per_second": 21.328, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.1434326171875, |
|
"learning_rate": 0.00022518788526768165, |
|
"loss": 0.452, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7341176470588235, |
|
"eval_f1": 0.4593301435406698, |
|
"eval_loss": 0.44121384620666504, |
|
"eval_mcc": 0.3493311330281174, |
|
"eval_precision": 0.7128712871287128, |
|
"eval_recall": 0.3388235294117647, |
|
"eval_runtime": 1.8788, |
|
"eval_samples_per_second": 678.621, |
|
"eval_steps_per_second": 21.29, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.2687256336212158, |
|
"learning_rate": 0.0001501252568451211, |
|
"loss": 0.4436, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7607843137254902, |
|
"eval_f1": 0.5970937912813739, |
|
"eval_loss": 0.43906503915786743, |
|
"eval_mcc": 0.4372564381540906, |
|
"eval_precision": 0.6807228915662651, |
|
"eval_recall": 0.5317647058823529, |
|
"eval_runtime": 1.8778, |
|
"eval_samples_per_second": 678.984, |
|
"eval_steps_per_second": 21.301, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.7441729307174683, |
|
"learning_rate": 7.506262842256055e-05, |
|
"loss": 0.4334, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7733333333333333, |
|
"eval_f1": 0.611036339165545, |
|
"eval_loss": 0.4335384964942932, |
|
"eval_mcc": 0.46528826388603933, |
|
"eval_precision": 0.7138364779874213, |
|
"eval_recall": 0.5341176470588235, |
|
"eval_runtime": 1.8888, |
|
"eval_samples_per_second": 675.037, |
|
"eval_steps_per_second": 21.178, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.9164094924926758, |
|
"learning_rate": 0.0, |
|
"loss": 0.4319, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7694117647058824, |
|
"eval_f1": 0.5870786516853933, |
|
"eval_loss": 0.429920494556427, |
|
"eval_mcc": 0.45148621894108926, |
|
"eval_precision": 0.7282229965156795, |
|
"eval_recall": 0.49176470588235294, |
|
"eval_runtime": 1.8741, |
|
"eval_samples_per_second": 680.328, |
|
"eval_steps_per_second": 21.344, |
|
"step": 1120 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 2042288065440.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.796338716906447, |
|
"learning_rate": 0.0005254383989579239, |
|
"num_train_epochs": 7, |
|
"temperature": 7 |
|
} |
|
} |
|
|