|
{ |
|
"best_metric": 0.7694117647058824, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-17/checkpoint-1440", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 1440, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.2461354732513428, |
|
"learning_rate": 0.0004470795307643556, |
|
"loss": 0.5079, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.48573797941207886, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.875, |
|
"eval_samples_per_second": 679.994, |
|
"eval_steps_per_second": 21.333, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.359627366065979, |
|
"learning_rate": 0.00039119458941881116, |
|
"loss": 0.4809, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.691764705882353, |
|
"eval_f1": 0.18295218295218293, |
|
"eval_loss": 0.46239688992500305, |
|
"eval_mcc": 0.20568505072418322, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.10352941176470588, |
|
"eval_runtime": 2.0666, |
|
"eval_samples_per_second": 616.947, |
|
"eval_steps_per_second": 19.355, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.3378326892852783, |
|
"learning_rate": 0.0003353096480732667, |
|
"loss": 0.4671, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7223529411764706, |
|
"eval_f1": 0.470059880239521, |
|
"eval_loss": 0.45456141233444214, |
|
"eval_mcc": 0.321941426257721, |
|
"eval_precision": 0.6460905349794238, |
|
"eval_recall": 0.36941176470588233, |
|
"eval_runtime": 1.8782, |
|
"eval_samples_per_second": 678.832, |
|
"eval_steps_per_second": 21.297, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.1498600244522095, |
|
"learning_rate": 0.00027942470672772226, |
|
"loss": 0.4542, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7286274509803922, |
|
"eval_f1": 0.41156462585034015, |
|
"eval_loss": 0.4409485161304474, |
|
"eval_mcc": 0.33217642136796893, |
|
"eval_precision": 0.7423312883435583, |
|
"eval_recall": 0.2847058823529412, |
|
"eval_runtime": 1.8716, |
|
"eval_samples_per_second": 681.245, |
|
"eval_steps_per_second": 21.372, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.308472752571106, |
|
"learning_rate": 0.0002235397653821778, |
|
"loss": 0.4461, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.735686274509804, |
|
"eval_f1": 0.587515299877601, |
|
"eval_loss": 0.4530380070209503, |
|
"eval_mcc": 0.3942171016225521, |
|
"eval_precision": 0.6122448979591837, |
|
"eval_recall": 0.5647058823529412, |
|
"eval_runtime": 1.8727, |
|
"eval_samples_per_second": 680.848, |
|
"eval_steps_per_second": 21.36, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.423608422279358, |
|
"learning_rate": 0.00016765482403663335, |
|
"loss": 0.4408, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7474509803921568, |
|
"eval_f1": 0.48562300319488827, |
|
"eval_loss": 0.43300551176071167, |
|
"eval_mcc": 0.3880833523891067, |
|
"eval_precision": 0.7562189054726368, |
|
"eval_recall": 0.35764705882352943, |
|
"eval_runtime": 1.867, |
|
"eval_samples_per_second": 682.912, |
|
"eval_steps_per_second": 21.425, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.9654881954193115, |
|
"learning_rate": 0.0001117698826910889, |
|
"loss": 0.4337, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.756078431372549, |
|
"eval_f1": 0.5997425997425998, |
|
"eval_loss": 0.4409618675708771, |
|
"eval_mcc": 0.4304694096409861, |
|
"eval_precision": 0.6619318181818182, |
|
"eval_recall": 0.548235294117647, |
|
"eval_runtime": 1.8658, |
|
"eval_samples_per_second": 683.346, |
|
"eval_steps_per_second": 21.438, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.4317703247070312, |
|
"learning_rate": 5.588494134554445e-05, |
|
"loss": 0.4256, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_f1": 0.5945945945945945, |
|
"eval_loss": 0.4346446692943573, |
|
"eval_mcc": 0.44362213114101423, |
|
"eval_precision": 0.6984126984126984, |
|
"eval_recall": 0.5176470588235295, |
|
"eval_runtime": 1.8629, |
|
"eval_samples_per_second": 684.431, |
|
"eval_steps_per_second": 21.472, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.2457499504089355, |
|
"learning_rate": 0.0, |
|
"loss": 0.421, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7694117647058824, |
|
"eval_f1": 0.5939226519337016, |
|
"eval_loss": 0.4335412383079529, |
|
"eval_mcc": 0.452898203483245, |
|
"eval_precision": 0.7190635451505016, |
|
"eval_recall": 0.5058823529411764, |
|
"eval_runtime": 1.8642, |
|
"eval_samples_per_second": 683.942, |
|
"eval_steps_per_second": 21.457, |
|
"step": 1440 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2625798941280.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.7930879892159086, |
|
"learning_rate": 0.0005029644721099001, |
|
"num_train_epochs": 9, |
|
"temperature": 30 |
|
} |
|
} |
|
|