|
{ |
|
"best_metric": 0.541095890410959, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-402", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 402, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.22246789932250977, |
|
"learning_rate": 1.5679734473172616e-05, |
|
"loss": 0.2458, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.1889893114566803, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 28.3905, |
|
"eval_samples_per_second": 35.998, |
|
"eval_steps_per_second": 1.127, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.2654605507850647, |
|
"learning_rate": 1.3937541753931215e-05, |
|
"loss": 0.1861, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.17826829850673676, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 29.9866, |
|
"eval_samples_per_second": 34.082, |
|
"eval_steps_per_second": 1.067, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.32397571206092834, |
|
"learning_rate": 1.2195349034689811e-05, |
|
"loss": 0.1759, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5107632093933464, |
|
"eval_f1": 0.07063197026022304, |
|
"eval_loss": 0.16691070795059204, |
|
"eval_precision": 0.7037037037037037, |
|
"eval_recall": 0.03718199608610567, |
|
"eval_runtime": 28.5351, |
|
"eval_samples_per_second": 35.816, |
|
"eval_steps_per_second": 1.121, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.6136437058448792, |
|
"learning_rate": 1.045315631544841e-05, |
|
"loss": 0.168, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5362035225048923, |
|
"eval_f1": 0.18556701030927833, |
|
"eval_loss": 0.160459965467453, |
|
"eval_precision": 0.7605633802816901, |
|
"eval_recall": 0.10567514677103718, |
|
"eval_runtime": 28.6402, |
|
"eval_samples_per_second": 35.684, |
|
"eval_steps_per_second": 1.117, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.3216162919998169, |
|
"learning_rate": 8.710963596207009e-06, |
|
"loss": 0.1647, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5401174168297456, |
|
"eval_f1": 0.20068027210884357, |
|
"eval_loss": 0.1576094627380371, |
|
"eval_precision": 0.7662337662337663, |
|
"eval_recall": 0.11545988258317025, |
|
"eval_runtime": 28.3765, |
|
"eval_samples_per_second": 36.016, |
|
"eval_steps_per_second": 1.128, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.45282211899757385, |
|
"learning_rate": 6.968770876965607e-06, |
|
"loss": 0.1625, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.541095890410959, |
|
"eval_f1": 0.20373514431239387, |
|
"eval_loss": 0.15556302666664124, |
|
"eval_precision": 0.7692307692307693, |
|
"eval_recall": 0.11741682974559686, |
|
"eval_runtime": 28.8112, |
|
"eval_samples_per_second": 35.472, |
|
"eval_steps_per_second": 1.111, |
|
"step": 402 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 670, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1414171183680.0, |
|
"train_batch_size": 46, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.19075569878013487, |
|
"learning_rate": 1.7421927192414017e-05, |
|
"num_train_epochs": 10, |
|
"per_device_train_batch_size": 46, |
|
"temperature": 24 |
|
} |
|
} |
|
|