File size: 5,851 Bytes
7c15122 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
{
"best_metric": 0.8754267260214113,
"best_model_checkpoint": "./output//roberta-large_ipc1_G_5_32_5e-6_0.01_0.06_07-08-22_06-40/checkpoint-24000",
"epoch": 0.14484881405033495,
"global_step": 24000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.0117890840324302e-07,
"loss": 0.4247,
"step": 2000
},
{
"epoch": 0.01,
"eval_accuracy": 0.03380152270937254,
"eval_f1": 0.0619012340183338,
"eval_loss": 0.20814624428749084,
"eval_roc_auc": 0.5159319239399485,
"eval_runtime": 8197.4985,
"eval_samples_per_second": 33.455,
"eval_steps_per_second": 2.091,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 4.0235781680648604e-07,
"loss": 0.1716,
"step": 4000
},
{
"epoch": 0.02,
"eval_accuracy": 0.6045477086432718,
"eval_f1": 0.7149013381719211,
"eval_loss": 0.12680654227733612,
"eval_roc_auc": 0.7994218657046591,
"eval_runtime": 8196.5733,
"eval_samples_per_second": 33.459,
"eval_steps_per_second": 2.091,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 6.03536725209729e-07,
"loss": 0.114,
"step": 6000
},
{
"epoch": 0.04,
"eval_accuracy": 0.7269478719990665,
"eval_f1": 0.8064969810911458,
"eval_loss": 0.09042555838823318,
"eval_roc_auc": 0.863152127136236,
"eval_runtime": 8195.1585,
"eval_samples_per_second": 33.465,
"eval_steps_per_second": 2.092,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 8.047156336129721e-07,
"loss": 0.0865,
"step": 8000
},
{
"epoch": 0.05,
"eval_accuracy": 0.774762258977276,
"eval_f1": 0.8402673463726624,
"eval_loss": 0.07436466217041016,
"eval_roc_auc": 0.8920166546037435,
"eval_runtime": 8191.8248,
"eval_samples_per_second": 33.478,
"eval_steps_per_second": 2.092,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 1.005894542016215e-06,
"loss": 0.0743,
"step": 10000
},
{
"epoch": 0.06,
"eval_accuracy": 0.7829883900703013,
"eval_f1": 0.8487029462020401,
"eval_loss": 0.06654931604862213,
"eval_roc_auc": 0.9001618402078589,
"eval_runtime": 8196.3415,
"eval_samples_per_second": 33.46,
"eval_steps_per_second": 2.091,
"step": 10000
},
{
"epoch": 0.07,
"learning_rate": 1.207073450419458e-06,
"loss": 0.0664,
"step": 12000
},
{
"epoch": 0.07,
"eval_accuracy": 0.794452466380794,
"eval_f1": 0.859163976123391,
"eval_loss": 0.061000920832157135,
"eval_roc_auc": 0.9083440937813021,
"eval_runtime": 8197.7585,
"eval_samples_per_second": 33.454,
"eval_steps_per_second": 2.091,
"step": 12000
},
{
"epoch": 0.08,
"learning_rate": 1.4082523588227012e-06,
"loss": 0.062,
"step": 14000
},
{
"epoch": 0.08,
"eval_accuracy": 0.7962172923777019,
"eval_f1": 0.8624403660379281,
"eval_loss": 0.05871045961976051,
"eval_roc_auc": 0.9133217347648154,
"eval_runtime": 8199.4678,
"eval_samples_per_second": 33.447,
"eval_steps_per_second": 2.091,
"step": 14000
},
{
"epoch": 0.1,
"learning_rate": 1.6094312672259442e-06,
"loss": 0.0599,
"step": 16000
},
{
"epoch": 0.1,
"eval_accuracy": 0.7979055453458184,
"eval_f1": 0.8656510165555269,
"eval_loss": 0.05596928298473358,
"eval_roc_auc": 0.9150237468241987,
"eval_runtime": 8200.3089,
"eval_samples_per_second": 33.444,
"eval_steps_per_second": 2.09,
"step": 16000
},
{
"epoch": 0.11,
"learning_rate": 1.8106101756291871e-06,
"loss": 0.0569,
"step": 18000
},
{
"epoch": 0.11,
"eval_accuracy": 0.8009939908404072,
"eval_f1": 0.8665697356924421,
"eval_loss": 0.05469416454434395,
"eval_roc_auc": 0.9135503362863645,
"eval_runtime": 8201.2258,
"eval_samples_per_second": 33.44,
"eval_steps_per_second": 2.09,
"step": 18000
},
{
"epoch": 0.12,
"learning_rate": 2.01178908403243e-06,
"loss": 0.0554,
"step": 20000
},
{
"epoch": 0.12,
"eval_accuracy": 0.8071891135031066,
"eval_f1": 0.8723540166082954,
"eval_loss": 0.052826616913080215,
"eval_roc_auc": 0.9192588608785235,
"eval_runtime": 8204.8057,
"eval_samples_per_second": 33.425,
"eval_steps_per_second": 2.089,
"step": 20000
},
{
"epoch": 0.13,
"learning_rate": 2.2129679924356733e-06,
"loss": 0.0541,
"step": 22000
},
{
"epoch": 0.13,
"eval_accuracy": 0.8054935678655816,
"eval_f1": 0.8706366827585739,
"eval_loss": 0.05298588052392006,
"eval_roc_auc": 0.9176729146649489,
"eval_runtime": 8206.5949,
"eval_samples_per_second": 33.418,
"eval_steps_per_second": 2.089,
"step": 22000
},
{
"epoch": 0.14,
"learning_rate": 2.414146900838916e-06,
"loss": 0.0522,
"step": 24000
},
{
"epoch": 0.14,
"eval_accuracy": 0.8137306379627198,
"eval_f1": 0.8754267260214113,
"eval_loss": 0.05081520974636078,
"eval_roc_auc": 0.9180724092422241,
"eval_runtime": 8275.6912,
"eval_samples_per_second": 33.139,
"eval_steps_per_second": 2.071,
"step": 24000
}
],
"max_steps": 828450,
"num_train_epochs": 5,
"total_flos": 7.15754724655104e+17,
"trial_name": null,
"trial_params": null
}
|