|
{ |
|
"best_metric": 0.8346379647749511, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-38/checkpoint-864", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 864, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.8633686304092407, |
|
"learning_rate": 0.00011164310281576651, |
|
"loss": 0.584, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7690802348336595, |
|
"eval_f1": 0.7993197278911565, |
|
"eval_loss": 0.48832422494888306, |
|
"eval_precision": 0.706766917293233, |
|
"eval_recall": 0.9197651663405088, |
|
"eval_runtime": 28.3969, |
|
"eval_samples_per_second": 35.99, |
|
"eval_steps_per_second": 1.127, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9557557106018066, |
|
"learning_rate": 9.76877149637957e-05, |
|
"loss": 0.4827, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8003913894324853, |
|
"eval_f1": 0.8222996515679443, |
|
"eval_loss": 0.44451940059661865, |
|
"eval_precision": 0.7409733124018838, |
|
"eval_recall": 0.923679060665362, |
|
"eval_runtime": 28.5294, |
|
"eval_samples_per_second": 35.823, |
|
"eval_steps_per_second": 1.122, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.0629470348358154, |
|
"learning_rate": 8.373232711182488e-05, |
|
"loss": 0.448, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.824853228962818, |
|
"eval_f1": 0.8353265869365226, |
|
"eval_loss": 0.4216720759868622, |
|
"eval_precision": 0.7881944444444444, |
|
"eval_recall": 0.8884540117416829, |
|
"eval_runtime": 28.7944, |
|
"eval_samples_per_second": 35.493, |
|
"eval_steps_per_second": 1.111, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.688210964202881, |
|
"learning_rate": 6.977693925985407e-05, |
|
"loss": 0.4341, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8209393346379648, |
|
"eval_f1": 0.825214899713467, |
|
"eval_loss": 0.42298367619514465, |
|
"eval_precision": 0.8059701492537313, |
|
"eval_recall": 0.8454011741682974, |
|
"eval_runtime": 28.2399, |
|
"eval_samples_per_second": 36.19, |
|
"eval_steps_per_second": 1.133, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.334526300430298, |
|
"learning_rate": 5.5821551407883254e-05, |
|
"loss": 0.4202, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8287671232876712, |
|
"eval_f1": 0.8436103663985702, |
|
"eval_loss": 0.4076910614967346, |
|
"eval_precision": 0.7763157894736842, |
|
"eval_recall": 0.923679060665362, |
|
"eval_runtime": 28.534, |
|
"eval_samples_per_second": 35.817, |
|
"eval_steps_per_second": 1.121, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.195997714996338, |
|
"learning_rate": 4.186616355591244e-05, |
|
"loss": 0.4109, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8258317025440313, |
|
"eval_f1": 0.842756183745583, |
|
"eval_loss": 0.4115942418575287, |
|
"eval_precision": 0.7681159420289855, |
|
"eval_recall": 0.9334637964774951, |
|
"eval_runtime": 28.3073, |
|
"eval_samples_per_second": 36.104, |
|
"eval_steps_per_second": 1.13, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.727288007736206, |
|
"learning_rate": 2.7910775703941627e-05, |
|
"loss": 0.4017, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8307240704500979, |
|
"eval_f1": 0.8467670504871568, |
|
"eval_loss": 0.41019657254219055, |
|
"eval_precision": 0.7734627831715211, |
|
"eval_recall": 0.9354207436399217, |
|
"eval_runtime": 28.5087, |
|
"eval_samples_per_second": 35.849, |
|
"eval_steps_per_second": 1.122, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.721353530883789, |
|
"learning_rate": 1.3955387851970814e-05, |
|
"loss": 0.4014, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8258317025440313, |
|
"eval_f1": 0.8405017921146953, |
|
"eval_loss": 0.40405774116516113, |
|
"eval_precision": 0.775206611570248, |
|
"eval_recall": 0.9178082191780822, |
|
"eval_runtime": 28.5925, |
|
"eval_samples_per_second": 35.744, |
|
"eval_steps_per_second": 1.119, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.3781871795654297, |
|
"learning_rate": 0.0, |
|
"loss": 0.3968, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8346379647749511, |
|
"eval_f1": 0.8456621004566209, |
|
"eval_loss": 0.4000749886035919, |
|
"eval_precision": 0.7928082191780822, |
|
"eval_recall": 0.9060665362035225, |
|
"eval_runtime": 28.358, |
|
"eval_samples_per_second": 36.039, |
|
"eval_steps_per_second": 1.128, |
|
"step": 864 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 864, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8898885181069172, |
|
"learning_rate": 0.00012559849066773733, |
|
"num_train_epochs": 9, |
|
"temperature": 3 |
|
} |
|
} |
|
|