|
{ |
|
"best_metric": 0.8346379647749511, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-20/checkpoint-480", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 672, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.3483004570007324, |
|
"learning_rate": 0.00013855533928571864, |
|
"loss": 0.5685, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_f1": 0.8019966722129783, |
|
"eval_loss": 0.477300226688385, |
|
"eval_precision": 0.6975397973950795, |
|
"eval_recall": 0.9432485322896281, |
|
"eval_runtime": 25.843, |
|
"eval_samples_per_second": 39.546, |
|
"eval_steps_per_second": 1.238, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.558192729949951, |
|
"learning_rate": 0.00011546278273809888, |
|
"loss": 0.4736, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8072407045009785, |
|
"eval_f1": 0.8255093002657219, |
|
"eval_loss": 0.43477192521095276, |
|
"eval_precision": 0.7540453074433657, |
|
"eval_recall": 0.9119373776908023, |
|
"eval_runtime": 25.942, |
|
"eval_samples_per_second": 39.396, |
|
"eval_steps_per_second": 1.234, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.4191761016845703, |
|
"learning_rate": 9.237022619047909e-05, |
|
"loss": 0.4377, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8258317025440313, |
|
"eval_f1": 0.8381818181818183, |
|
"eval_loss": 0.4183538258075714, |
|
"eval_precision": 0.7826825127334465, |
|
"eval_recall": 0.9021526418786693, |
|
"eval_runtime": 25.665, |
|
"eval_samples_per_second": 39.821, |
|
"eval_steps_per_second": 1.247, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.199775218963623, |
|
"learning_rate": 6.927766964285932e-05, |
|
"loss": 0.4231, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8199608610567515, |
|
"eval_f1": 0.8257575757575758, |
|
"eval_loss": 0.4184337258338928, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.8532289628180039, |
|
"eval_runtime": 25.0602, |
|
"eval_samples_per_second": 40.782, |
|
"eval_steps_per_second": 1.277, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.4152629375457764, |
|
"learning_rate": 4.6185113095239546e-05, |
|
"loss": 0.4131, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8346379647749511, |
|
"eval_f1": 0.8467815049864007, |
|
"eval_loss": 0.4067833125591278, |
|
"eval_precision": 0.7888513513513513, |
|
"eval_recall": 0.913894324853229, |
|
"eval_runtime": 24.9745, |
|
"eval_samples_per_second": 40.922, |
|
"eval_steps_per_second": 1.281, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.8709616661071777, |
|
"learning_rate": 2.3092556547619773e-05, |
|
"loss": 0.4025, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8307240704500979, |
|
"eval_f1": 0.8442844284428443, |
|
"eval_loss": 0.4088129699230194, |
|
"eval_precision": 0.7816666666666666, |
|
"eval_recall": 0.9178082191780822, |
|
"eval_runtime": 25.2849, |
|
"eval_samples_per_second": 40.419, |
|
"eval_steps_per_second": 1.266, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.1289896965026855, |
|
"learning_rate": 0.0, |
|
"loss": 0.398, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8287671232876712, |
|
"eval_f1": 0.839596700274977, |
|
"eval_loss": 0.40621256828308105, |
|
"eval_precision": 0.7896551724137931, |
|
"eval_recall": 0.8962818003913894, |
|
"eval_runtime": 25.662, |
|
"eval_samples_per_second": 39.825, |
|
"eval_steps_per_second": 1.247, |
|
"step": 672 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 672, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 1649866380960.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8907547983808969, |
|
"learning_rate": 0.00016164789583333842, |
|
"num_train_epochs": 7, |
|
"temperature": 2 |
|
} |
|
} |
|
|