|
{ |
|
"best_metric": 0.6369415521621704, |
|
"best_model_checkpoint": "BERT-WMM/run-0/checkpoint-533", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 3198, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 11.681629180908203, |
|
"learning_rate": 2.5842410540171958e-05, |
|
"loss": 0.7181, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7525821596244131, |
|
"eval_loss": 0.6369415521621704, |
|
"eval_runtime": 1.9836, |
|
"eval_samples_per_second": 1073.792, |
|
"eval_steps_per_second": 67.553, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 6.492458820343018, |
|
"learning_rate": 2.1053231418568556e-05, |
|
"loss": 0.4571, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7497652582159624, |
|
"eval_loss": 0.6769182682037354, |
|
"eval_runtime": 2.0259, |
|
"eval_samples_per_second": 1051.385, |
|
"eval_steps_per_second": 66.144, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 13.743987083435059, |
|
"learning_rate": 1.626405229696515e-05, |
|
"loss": 0.2889, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7596244131455399, |
|
"eval_loss": 0.8726277351379395, |
|
"eval_runtime": 2.0636, |
|
"eval_samples_per_second": 1032.174, |
|
"eval_steps_per_second": 64.935, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 31.317466735839844, |
|
"learning_rate": 1.147487317536175e-05, |
|
"loss": 0.1815, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7572769953051643, |
|
"eval_loss": 1.311642050743103, |
|
"eval_runtime": 2.081, |
|
"eval_samples_per_second": 1023.549, |
|
"eval_steps_per_second": 64.392, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 0.05295910686254501, |
|
"learning_rate": 6.685694053758349e-06, |
|
"loss": 0.1073, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7610328638497652, |
|
"eval_loss": 1.4908134937286377, |
|
"eval_runtime": 2.0562, |
|
"eval_samples_per_second": 1035.88, |
|
"eval_steps_per_second": 65.168, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"grad_norm": 0.06595510989427567, |
|
"learning_rate": 1.896514932154947e-06, |
|
"loss": 0.045, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7591549295774648, |
|
"eval_loss": 1.5977472066879272, |
|
"eval_runtime": 2.0716, |
|
"eval_samples_per_second": 1028.204, |
|
"eval_steps_per_second": 64.685, |
|
"step": 3198 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3198, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 1014949731017880.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.063158966177536e-05, |
|
"num_train_epochs": 6, |
|
"per_device_train_batch_size": 16, |
|
"weight_decay": 2.6426532465921284e-05 |
|
} |
|
} |
|
|