|
{ |
|
"best_metric": 0.541095890410959, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-96", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 480, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4191925525665283, |
|
"learning_rate": 0.00022738674133918383, |
|
"loss": 0.098, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.541095890410959, |
|
"eval_f1": 0.20642978003384094, |
|
"eval_loss": 0.08223184198141098, |
|
"eval_precision": 0.7625, |
|
"eval_recall": 0.11937377690802348, |
|
"eval_runtime": 28.0826, |
|
"eval_samples_per_second": 36.393, |
|
"eval_steps_per_second": 1.139, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.35244929790496826, |
|
"learning_rate": 0.00017054005600438787, |
|
"loss": 0.0833, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5371819960861057, |
|
"eval_f1": 0.19145299145299144, |
|
"eval_loss": 0.07941487431526184, |
|
"eval_precision": 0.7567567567567568, |
|
"eval_recall": 0.1095890410958904, |
|
"eval_runtime": 28.0455, |
|
"eval_samples_per_second": 36.441, |
|
"eval_steps_per_second": 1.141, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.2497691810131073, |
|
"learning_rate": 0.00011369337066959191, |
|
"loss": 0.0813, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5342465753424658, |
|
"eval_f1": 0.18213058419243985, |
|
"eval_loss": 0.07839526236057281, |
|
"eval_precision": 0.7464788732394366, |
|
"eval_recall": 0.10371819960861056, |
|
"eval_runtime": 28.0524, |
|
"eval_samples_per_second": 36.432, |
|
"eval_steps_per_second": 1.141, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.2048983871936798, |
|
"learning_rate": 5.684668533479596e-05, |
|
"loss": 0.0805, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5332681017612525, |
|
"eval_f1": 0.17331022530329288, |
|
"eval_loss": 0.07847526669502258, |
|
"eval_precision": 0.7575757575757576, |
|
"eval_recall": 0.09784735812133072, |
|
"eval_runtime": 28.003, |
|
"eval_samples_per_second": 36.496, |
|
"eval_steps_per_second": 1.143, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.3579064607620239, |
|
"learning_rate": 0.0, |
|
"loss": 0.0801, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.538160469667319, |
|
"eval_f1": 0.1945392491467577, |
|
"eval_loss": 0.07800330221652985, |
|
"eval_precision": 0.76, |
|
"eval_recall": 0.11154598825831702, |
|
"eval_runtime": 27.9144, |
|
"eval_samples_per_second": 36.612, |
|
"eval_steps_per_second": 1.146, |
|
"step": 480 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1178475986400.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.09292461434001364, |
|
"learning_rate": 0.0002842334266739798, |
|
"num_train_epochs": 5, |
|
"temperature": 2 |
|
} |
|
} |
|
|