|
{ |
|
"best_metric": 0.7750439367311072, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-2140", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2140, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.918547630310059, |
|
"learning_rate": 0.00018354791279082705, |
|
"loss": 0.5995, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7053309900410076, |
|
"eval_f1": 0.4944723618090453, |
|
"eval_loss": 0.5527746677398682, |
|
"eval_mcc": 0.29864842708933803, |
|
"eval_precision": 0.5774647887323944, |
|
"eval_recall": 0.43233743409490333, |
|
"eval_runtime": 3.1509, |
|
"eval_samples_per_second": 541.75, |
|
"eval_steps_per_second": 17.138, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.0788278579711914, |
|
"learning_rate": 0.00016519312151174438, |
|
"loss": 0.5413, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7258347978910369, |
|
"eval_f1": 0.4845814977973568, |
|
"eval_loss": 0.5296629071235657, |
|
"eval_mcc": 0.33330907379505725, |
|
"eval_precision": 0.6489675516224189, |
|
"eval_recall": 0.3866432337434095, |
|
"eval_runtime": 3.2724, |
|
"eval_samples_per_second": 521.631, |
|
"eval_steps_per_second": 16.502, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.197441339492798, |
|
"learning_rate": 0.00014683833023266166, |
|
"loss": 0.5154, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7258347978910369, |
|
"eval_f1": 0.38421052631578945, |
|
"eval_loss": 0.5396824479103088, |
|
"eval_mcc": 0.32457545244467173, |
|
"eval_precision": 0.7643979057591623, |
|
"eval_recall": 0.2565905096660808, |
|
"eval_runtime": 3.1857, |
|
"eval_samples_per_second": 535.83, |
|
"eval_steps_per_second": 16.951, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.9606778621673584, |
|
"learning_rate": 0.00012848353895357893, |
|
"loss": 0.504, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7264206209724663, |
|
"eval_f1": 0.3942931258106355, |
|
"eval_loss": 0.5155606269836426, |
|
"eval_mcc": 0.32574276921975914, |
|
"eval_precision": 0.7524752475247525, |
|
"eval_recall": 0.2671353251318102, |
|
"eval_runtime": 3.3181, |
|
"eval_samples_per_second": 514.447, |
|
"eval_steps_per_second": 16.274, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.5369954109191895, |
|
"learning_rate": 0.00011012874767449624, |
|
"loss": 0.4946, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7504393673110721, |
|
"eval_f1": 0.5590062111801243, |
|
"eval_loss": 0.5006516575813293, |
|
"eval_mcc": 0.4049525635550248, |
|
"eval_precision": 0.6801007556675063, |
|
"eval_recall": 0.47451669595782076, |
|
"eval_runtime": 3.155, |
|
"eval_samples_per_second": 541.038, |
|
"eval_steps_per_second": 17.115, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 5.3806962966918945, |
|
"learning_rate": 9.177395639541353e-05, |
|
"loss": 0.4829, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7586408904510837, |
|
"eval_f1": 0.603082851637765, |
|
"eval_loss": 0.486370325088501, |
|
"eval_mcc": 0.43615024061600965, |
|
"eval_precision": 0.6673773987206824, |
|
"eval_recall": 0.5500878734622144, |
|
"eval_runtime": 3.9493, |
|
"eval_samples_per_second": 432.226, |
|
"eval_steps_per_second": 13.673, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.009945392608643, |
|
"learning_rate": 7.341916511633083e-05, |
|
"loss": 0.4704, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7580550673696543, |
|
"eval_f1": 0.5100830367734281, |
|
"eval_loss": 0.4908476173877716, |
|
"eval_mcc": 0.41865909968733495, |
|
"eval_precision": 0.7846715328467153, |
|
"eval_recall": 0.37785588752196836, |
|
"eval_runtime": 3.1648, |
|
"eval_samples_per_second": 539.371, |
|
"eval_steps_per_second": 17.063, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.42345666885376, |
|
"learning_rate": 5.506437383724812e-05, |
|
"loss": 0.4564, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7662565905096661, |
|
"eval_f1": 0.5973763874873865, |
|
"eval_loss": 0.47338271141052246, |
|
"eval_mcc": 0.4474691814812865, |
|
"eval_precision": 0.7014218009478673, |
|
"eval_recall": 0.5202108963093146, |
|
"eval_runtime": 3.2515, |
|
"eval_samples_per_second": 524.99, |
|
"eval_steps_per_second": 16.608, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.744714260101318, |
|
"learning_rate": 3.6709582558165414e-05, |
|
"loss": 0.4528, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7656707674282367, |
|
"eval_f1": 0.5975855130784709, |
|
"eval_loss": 0.4673592448234558, |
|
"eval_mcc": 0.4464084855010257, |
|
"eval_precision": 0.6988235294117647, |
|
"eval_recall": 0.5219683655536028, |
|
"eval_runtime": 3.1615, |
|
"eval_samples_per_second": 539.929, |
|
"eval_steps_per_second": 17.08, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.320211887359619, |
|
"learning_rate": 1.8354791279082707e-05, |
|
"loss": 0.4449, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7750439367311072, |
|
"eval_f1": 0.6335877862595419, |
|
"eval_loss": 0.4649231731891632, |
|
"eval_mcc": 0.47665988640122975, |
|
"eval_precision": 0.6931106471816284, |
|
"eval_recall": 0.5834797891036907, |
|
"eval_runtime": 3.2751, |
|
"eval_samples_per_second": 521.205, |
|
"eval_steps_per_second": 16.488, |
|
"step": 2140 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2354, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 11, |
|
"save_steps": 500, |
|
"total_flos": 5247756644400.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9896635045471636, |
|
"learning_rate": 0.00020190270406990977, |
|
"num_train_epochs": 11, |
|
"temperature": 11 |
|
} |
|
} |
|
|