File size: 4,806 Bytes
c6afd6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
{
"best_metric": 0.7544031311154599,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-6/checkpoint-744",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 744,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.4286664128303528,
"learning_rate": 1.945409831472016e-05,
"loss": 0.4806,
"step": 93
},
{
"epoch": 1.0,
"eval_accuracy": 0.5,
"eval_f1": 0.0,
"eval_loss": 0.4703535735607147,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 28.42,
"eval_samples_per_second": 35.961,
"eval_steps_per_second": 1.126,
"step": 93
},
{
"epoch": 2.0,
"grad_norm": 0.94158935546875,
"learning_rate": 1.7022336025380143e-05,
"loss": 0.4648,
"step": 186
},
{
"epoch": 2.0,
"eval_accuracy": 0.5176125244618396,
"eval_f1": 0.10849909584086799,
"eval_loss": 0.4538751244544983,
"eval_precision": 0.7142857142857143,
"eval_recall": 0.05870841487279843,
"eval_runtime": 28.3351,
"eval_samples_per_second": 36.068,
"eval_steps_per_second": 1.129,
"step": 186
},
{
"epoch": 3.0,
"grad_norm": 1.0048439502716064,
"learning_rate": 1.459057373604012e-05,
"loss": 0.4406,
"step": 279
},
{
"epoch": 3.0,
"eval_accuracy": 0.5929549902152642,
"eval_f1": 0.3677811550151976,
"eval_loss": 0.4149659276008606,
"eval_precision": 0.8231292517006803,
"eval_recall": 0.23679060665362034,
"eval_runtime": 28.3925,
"eval_samples_per_second": 35.995,
"eval_steps_per_second": 1.127,
"step": 279
},
{
"epoch": 4.0,
"grad_norm": 2.1001150608062744,
"learning_rate": 1.2158811446700102e-05,
"loss": 0.4126,
"step": 372
},
{
"epoch": 4.0,
"eval_accuracy": 0.7025440313111546,
"eval_f1": 0.6456876456876457,
"eval_loss": 0.39196181297302246,
"eval_precision": 0.7982708933717579,
"eval_recall": 0.5420743639921722,
"eval_runtime": 28.4943,
"eval_samples_per_second": 35.867,
"eval_steps_per_second": 1.123,
"step": 372
},
{
"epoch": 5.0,
"grad_norm": 1.2996047735214233,
"learning_rate": 9.72704915736008e-06,
"loss": 0.4021,
"step": 465
},
{
"epoch": 5.0,
"eval_accuracy": 0.735812133072407,
"eval_f1": 0.7133757961783439,
"eval_loss": 0.3851335346698761,
"eval_precision": 0.7795823665893271,
"eval_recall": 0.6575342465753424,
"eval_runtime": 28.8344,
"eval_samples_per_second": 35.444,
"eval_steps_per_second": 1.11,
"step": 465
},
{
"epoch": 6.0,
"grad_norm": 1.8318911790847778,
"learning_rate": 7.29528686802006e-06,
"loss": 0.3976,
"step": 558
},
{
"epoch": 6.0,
"eval_accuracy": 0.7524461839530333,
"eval_f1": 0.7394438722966015,
"eval_loss": 0.3816056251525879,
"eval_precision": 0.7804347826086957,
"eval_recall": 0.7025440313111546,
"eval_runtime": 28.629,
"eval_samples_per_second": 35.698,
"eval_steps_per_second": 1.118,
"step": 558
},
{
"epoch": 7.0,
"grad_norm": 1.5164391994476318,
"learning_rate": 4.86352457868004e-06,
"loss": 0.3934,
"step": 651
},
{
"epoch": 7.0,
"eval_accuracy": 0.7504892367906066,
"eval_f1": 0.7357512953367875,
"eval_loss": 0.37981584668159485,
"eval_precision": 0.7819383259911894,
"eval_recall": 0.6947162426614482,
"eval_runtime": 29.4404,
"eval_samples_per_second": 34.714,
"eval_steps_per_second": 1.087,
"step": 651
},
{
"epoch": 8.0,
"grad_norm": 1.8273214101791382,
"learning_rate": 2.43176228934002e-06,
"loss": 0.3903,
"step": 744
},
{
"epoch": 8.0,
"eval_accuracy": 0.7544031311154599,
"eval_f1": 0.7462082912032356,
"eval_loss": 0.3790663480758667,
"eval_precision": 0.7719665271966527,
"eval_recall": 0.7221135029354208,
"eval_runtime": 28.4026,
"eval_samples_per_second": 35.983,
"eval_steps_per_second": 1.127,
"step": 744
}
],
"logging_steps": 500,
"max_steps": 837,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 500,
"total_flos": 1885561578240.0,
"train_batch_size": 33,
"trial_name": null,
"trial_params": {
"alpha": 0.6122687021783514,
"learning_rate": 2.188586060406018e-05,
"num_train_epochs": 9,
"per_device_train_batch_size": 33,
"temperature": 14
}
}
|