|
{ |
|
"best_metric": 0.9028693291973633, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-27/checkpoint-3870", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 3870, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.374743938446045, |
|
"learning_rate": 8.115439310915276e-05, |
|
"loss": 0.333, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8912369135323769, |
|
"eval_f1": 0.16888888888888887, |
|
"eval_loss": 0.30435287952423096, |
|
"eval_mcc": 0.19762599967287012, |
|
"eval_precision": 0.5377358490566038, |
|
"eval_recall": 0.10017574692442882, |
|
"eval_runtime": 9.9164, |
|
"eval_samples_per_second": 520.146, |
|
"eval_steps_per_second": 16.337, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9690736532211304, |
|
"learning_rate": 6.95609083792738e-05, |
|
"loss": 0.2975, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8953082590151221, |
|
"eval_f1": 0.19161676646706588, |
|
"eval_loss": 0.2960352897644043, |
|
"eval_mcc": 0.2394077507232092, |
|
"eval_precision": 0.6464646464646465, |
|
"eval_recall": 0.11247803163444639, |
|
"eval_runtime": 9.9096, |
|
"eval_samples_per_second": 520.505, |
|
"eval_steps_per_second": 16.348, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.378129005432129, |
|
"learning_rate": 5.796742364939483e-05, |
|
"loss": 0.2881, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8968592477704537, |
|
"eval_f1": 0.25280898876404495, |
|
"eval_loss": 0.28235557675361633, |
|
"eval_mcc": 0.27977789937011266, |
|
"eval_precision": 0.6293706293706294, |
|
"eval_recall": 0.15817223198594024, |
|
"eval_runtime": 9.9079, |
|
"eval_samples_per_second": 520.597, |
|
"eval_steps_per_second": 16.351, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.622187852859497, |
|
"learning_rate": 4.6373938919515864e-05, |
|
"loss": 0.2834, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9003489724699496, |
|
"eval_f1": 0.3622828784119107, |
|
"eval_loss": 0.2773243486881256, |
|
"eval_mcc": 0.354261144004657, |
|
"eval_precision": 0.6160337552742616, |
|
"eval_recall": 0.2565905096660808, |
|
"eval_runtime": 9.9154, |
|
"eval_samples_per_second": 520.202, |
|
"eval_steps_per_second": 16.338, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.0789802074432373, |
|
"learning_rate": 3.47804541896369e-05, |
|
"loss": 0.2776, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8984102365257852, |
|
"eval_f1": 0.3498759305210918, |
|
"eval_loss": 0.2715882658958435, |
|
"eval_mcc": 0.33948247492694117, |
|
"eval_precision": 0.5949367088607594, |
|
"eval_recall": 0.2478031634446397, |
|
"eval_runtime": 9.9239, |
|
"eval_samples_per_second": 519.755, |
|
"eval_steps_per_second": 16.324, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.8614028692245483, |
|
"learning_rate": 2.3186969459757932e-05, |
|
"loss": 0.2734, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9028693291973633, |
|
"eval_f1": 0.33817701453104354, |
|
"eval_loss": 0.26717719435691833, |
|
"eval_mcc": 0.35420175730444564, |
|
"eval_precision": 0.6808510638297872, |
|
"eval_recall": 0.22495606326889278, |
|
"eval_runtime": 9.9134, |
|
"eval_samples_per_second": 520.306, |
|
"eval_steps_per_second": 16.342, |
|
"step": 3870 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 9521655195600.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9885260340274179, |
|
"learning_rate": 9.274787783903173e-05, |
|
"num_train_epochs": 8, |
|
"temperature": 24 |
|
} |
|
} |
|
|