|
{ |
|
"best_metric": 0.6988037147804187, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-14292", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 14292, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0594383478164673, |
|
"learning_rate": 8.902447569795288e-05, |
|
"loss": 0.5725, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6804659216118369, |
|
"eval_f1": 0.3575949367088608, |
|
"eval_loss": 0.5597381591796875, |
|
"eval_mcc": 0.19612047304332003, |
|
"eval_precision": 0.5419664268585132, |
|
"eval_recall": 0.2668240850059032, |
|
"eval_runtime": 23.4624, |
|
"eval_samples_per_second": 541.547, |
|
"eval_steps_per_second": 16.963, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.540066123008728, |
|
"learning_rate": 7.789641623570877e-05, |
|
"loss": 0.5555, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6874704863843853, |
|
"eval_f1": 0.2603836841124977, |
|
"eval_loss": 0.5536655783653259, |
|
"eval_mcc": 0.18799631789309962, |
|
"eval_precision": 0.6164021164021164, |
|
"eval_recall": 0.16505312868949232, |
|
"eval_runtime": 23.2494, |
|
"eval_samples_per_second": 546.509, |
|
"eval_steps_per_second": 17.119, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.06575608253479, |
|
"learning_rate": 6.676835677346465e-05, |
|
"loss": 0.5493, |
|
"step": 4764 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6873130804344404, |
|
"eval_f1": 0.267379679144385, |
|
"eval_loss": 0.5435917377471924, |
|
"eval_mcc": 0.18869269284566614, |
|
"eval_precision": 0.6102693602693603, |
|
"eval_recall": 0.17119244391971664, |
|
"eval_runtime": 23.3586, |
|
"eval_samples_per_second": 543.954, |
|
"eval_steps_per_second": 17.039, |
|
"step": 4764 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.0746259689331055, |
|
"learning_rate": 5.564029731122055e-05, |
|
"loss": 0.543, |
|
"step": 6352 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6910908232331182, |
|
"eval_f1": 0.36064505619807785, |
|
"eval_loss": 0.5372793674468994, |
|
"eval_mcc": 0.22096514058006989, |
|
"eval_precision": 0.5814075630252101, |
|
"eval_recall": 0.2613931523022432, |
|
"eval_runtime": 23.311, |
|
"eval_samples_per_second": 545.065, |
|
"eval_steps_per_second": 17.073, |
|
"step": 6352 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.557748794555664, |
|
"learning_rate": 4.451223784897644e-05, |
|
"loss": 0.5374, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6961278136313552, |
|
"eval_f1": 0.36839522329461794, |
|
"eval_loss": 0.5319377183914185, |
|
"eval_mcc": 0.23523702656150358, |
|
"eval_precision": 0.5995740149094781, |
|
"eval_recall": 0.26587957497048403, |
|
"eval_runtime": 23.5602, |
|
"eval_samples_per_second": 539.299, |
|
"eval_steps_per_second": 16.893, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.728196620941162, |
|
"learning_rate": 3.3384178386732324e-05, |
|
"loss": 0.5341, |
|
"step": 9528 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6946324571068786, |
|
"eval_f1": 0.42279083606069623, |
|
"eval_loss": 0.5333712100982666, |
|
"eval_mcc": 0.2491393222028609, |
|
"eval_precision": 0.5713711298753519, |
|
"eval_recall": 0.33553719008264465, |
|
"eval_runtime": 23.3995, |
|
"eval_samples_per_second": 543.002, |
|
"eval_steps_per_second": 17.009, |
|
"step": 9528 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.6901352405548096, |
|
"learning_rate": 2.225611892448822e-05, |
|
"loss": 0.5312, |
|
"step": 11116 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6938454273571542, |
|
"eval_f1": 0.4832624867162594, |
|
"eval_loss": 0.5355043411254883, |
|
"eval_mcc": 0.27487985905109846, |
|
"eval_precision": 0.552383844518676, |
|
"eval_recall": 0.4295159386068477, |
|
"eval_runtime": 23.5503, |
|
"eval_samples_per_second": 539.525, |
|
"eval_steps_per_second": 16.9, |
|
"step": 11116 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.672201156616211, |
|
"learning_rate": 1.112805946224411e-05, |
|
"loss": 0.5288, |
|
"step": 12704 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6979379820557217, |
|
"eval_f1": 0.42613636363636365, |
|
"eval_loss": 0.5302781462669373, |
|
"eval_mcc": 0.2569293292783036, |
|
"eval_precision": 0.5809213208316347, |
|
"eval_recall": 0.3364817001180638, |
|
"eval_runtime": 23.502, |
|
"eval_samples_per_second": 540.634, |
|
"eval_steps_per_second": 16.935, |
|
"step": 12704 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.6605579853057861, |
|
"learning_rate": 0.0, |
|
"loss": 0.5269, |
|
"step": 14292 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6988037147804187, |
|
"eval_f1": 0.4386093589555523, |
|
"eval_loss": 0.5291692018508911, |
|
"eval_mcc": 0.2632234170463615, |
|
"eval_precision": 0.5790085205267235, |
|
"eval_recall": 0.3530106257378985, |
|
"eval_runtime": 23.2847, |
|
"eval_samples_per_second": 545.68, |
|
"eval_steps_per_second": 17.093, |
|
"step": 14292 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 14292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 35180046686700.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9165014388792823, |
|
"learning_rate": 0.00010015253516019699, |
|
"num_train_epochs": 9, |
|
"temperature": 13 |
|
} |
|
} |
|
|