|
{ |
|
"best_metric": 0.7301960784313726, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-10/checkpoint-800", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 960, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.012742280960083, |
|
"learning_rate": 0.0007591954920690624, |
|
"loss": 0.5869, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.5785399675369263, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.8697, |
|
"eval_samples_per_second": 681.924, |
|
"eval_steps_per_second": 21.394, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.057482957839966, |
|
"learning_rate": 0.0006642960555604296, |
|
"loss": 0.5715, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.676078431372549, |
|
"eval_f1": 0.4411366711772666, |
|
"eval_loss": 0.5550761222839355, |
|
"eval_mcc": 0.22526645932553852, |
|
"eval_precision": 0.5191082802547771, |
|
"eval_recall": 0.3835294117647059, |
|
"eval_runtime": 1.8672, |
|
"eval_samples_per_second": 682.827, |
|
"eval_steps_per_second": 21.422, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.645397663116455, |
|
"learning_rate": 0.0005693966190517968, |
|
"loss": 0.5736, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7043137254901961, |
|
"eval_f1": 0.28733459357277885, |
|
"eval_loss": 0.5566152334213257, |
|
"eval_mcc": 0.25125318174069416, |
|
"eval_precision": 0.7307692307692307, |
|
"eval_recall": 0.17882352941176471, |
|
"eval_runtime": 1.8663, |
|
"eval_samples_per_second": 683.154, |
|
"eval_steps_per_second": 21.432, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.3315376043319702, |
|
"learning_rate": 0.000474497182543164, |
|
"loss": 0.5569, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7113725490196079, |
|
"eval_f1": 0.4121405750798722, |
|
"eval_loss": 0.5367588400840759, |
|
"eval_mcc": 0.2830725629191131, |
|
"eval_precision": 0.6417910447761194, |
|
"eval_recall": 0.3035294117647059, |
|
"eval_runtime": 1.8722, |
|
"eval_samples_per_second": 681.014, |
|
"eval_steps_per_second": 21.365, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5584187507629395, |
|
"learning_rate": 0.0003795977460345312, |
|
"loss": 0.5469, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7301960784313726, |
|
"eval_f1": 0.50997150997151, |
|
"eval_loss": 0.5246202945709229, |
|
"eval_mcc": 0.3496658305237892, |
|
"eval_precision": 0.6462093862815884, |
|
"eval_recall": 0.4211764705882353, |
|
"eval_runtime": 1.8851, |
|
"eval_samples_per_second": 676.35, |
|
"eval_steps_per_second": 21.219, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.2840497493743896, |
|
"learning_rate": 0.0002846983095258984, |
|
"loss": 0.5382, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7168627450980393, |
|
"eval_f1": 0.5305591677503251, |
|
"eval_loss": 0.5253874659538269, |
|
"eval_mcc": 0.3348618979447744, |
|
"eval_precision": 0.5930232558139535, |
|
"eval_recall": 0.48, |
|
"eval_runtime": 1.8686, |
|
"eval_samples_per_second": 682.333, |
|
"eval_steps_per_second": 21.407, |
|
"step": 960 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 1750532627520.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9116955099903541, |
|
"learning_rate": 0.0008540949285776952, |
|
"num_train_epochs": 9, |
|
"temperature": 21 |
|
} |
|
} |
|
|