|
{ |
|
"best_metric": 0.8326810176125244, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-32/checkpoint-672", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 672, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7263739109039307, |
|
"learning_rate": 0.00011867778606566929, |
|
"loss": 0.5622, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7710371819960861, |
|
"eval_f1": 0.8006814310051107, |
|
"eval_loss": 0.4762067198753357, |
|
"eval_precision": 0.7088989441930619, |
|
"eval_recall": 0.9197651663405088, |
|
"eval_runtime": 25.5548, |
|
"eval_samples_per_second": 39.993, |
|
"eval_steps_per_second": 1.252, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.8613439798355103, |
|
"learning_rate": 0.00010384306280746064, |
|
"loss": 0.4724, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8072407045009785, |
|
"eval_f1": 0.8273444347063978, |
|
"eval_loss": 0.43931350111961365, |
|
"eval_precision": 0.7492063492063492, |
|
"eval_recall": 0.923679060665362, |
|
"eval_runtime": 25.0105, |
|
"eval_samples_per_second": 40.863, |
|
"eval_steps_per_second": 1.279, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.853325366973877, |
|
"learning_rate": 8.900833954925197e-05, |
|
"loss": 0.4412, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8258317025440313, |
|
"eval_f1": 0.8363970588235294, |
|
"eval_loss": 0.42110058665275574, |
|
"eval_precision": 0.7885615251299827, |
|
"eval_recall": 0.8904109589041096, |
|
"eval_runtime": 25.7114, |
|
"eval_samples_per_second": 39.749, |
|
"eval_steps_per_second": 1.245, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.9698286056518555, |
|
"learning_rate": 7.417361629104331e-05, |
|
"loss": 0.4294, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8160469667318982, |
|
"eval_f1": 0.8192307692307692, |
|
"eval_loss": 0.41999757289886475, |
|
"eval_precision": 0.8052930056710775, |
|
"eval_recall": 0.8336594911937377, |
|
"eval_runtime": 25.0795, |
|
"eval_samples_per_second": 40.75, |
|
"eval_steps_per_second": 1.276, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.9139764308929443, |
|
"learning_rate": 5.9338893032834646e-05, |
|
"loss": 0.4153, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8297455968688845, |
|
"eval_f1": 0.843806104129264, |
|
"eval_loss": 0.4074234664440155, |
|
"eval_precision": 0.7794361525704809, |
|
"eval_recall": 0.9197651663405088, |
|
"eval_runtime": 25.2807, |
|
"eval_samples_per_second": 40.426, |
|
"eval_steps_per_second": 1.266, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.3249385356903076, |
|
"learning_rate": 4.4504169774625984e-05, |
|
"loss": 0.4067, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.824853228962818, |
|
"eval_f1": 0.8397493285586393, |
|
"eval_loss": 0.4091223180294037, |
|
"eval_precision": 0.7739273927392739, |
|
"eval_recall": 0.9178082191780822, |
|
"eval_runtime": 25.2618, |
|
"eval_samples_per_second": 40.456, |
|
"eval_steps_per_second": 1.267, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.5627498626708984, |
|
"learning_rate": 2.9669446516417323e-05, |
|
"loss": 0.3993, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8326810176125244, |
|
"eval_f1": 0.848, |
|
"eval_loss": 0.4094063341617584, |
|
"eval_precision": 0.7768729641693811, |
|
"eval_recall": 0.9334637964774951, |
|
"eval_runtime": 26.0578, |
|
"eval_samples_per_second": 39.221, |
|
"eval_steps_per_second": 1.228, |
|
"step": 672 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 864, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 1649866380960.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8475925399213161, |
|
"learning_rate": 0.00013351250932387796, |
|
"num_train_epochs": 9, |
|
"temperature": 10 |
|
} |
|
} |
|
|