File size: 5,352 Bytes
6bb1803 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
{
"best_metric": 0.6946045049764275,
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1088",
"epoch": 9.967845659163988,
"eval_steps": 500,
"global_step": 1550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9967845659163987,
"eval_accuracy": 0.9435978055118868,
"eval_f1": 0.593056643174106,
"eval_loss": 0.14849668741226196,
"eval_precision": 0.5668662674650699,
"eval_recall": 0.6217843459222769,
"eval_runtime": 5.8633,
"eval_samples_per_second": 429.618,
"eval_steps_per_second": 53.724,
"step": 155
},
{
"epoch": 2.0,
"eval_accuracy": 0.9408546953704001,
"eval_f1": 0.6299060919817,
"eval_loss": 0.16094118356704712,
"eval_precision": 0.5623387790197765,
"eval_recall": 0.715927750410509,
"eval_runtime": 5.9155,
"eval_samples_per_second": 425.831,
"eval_steps_per_second": 53.25,
"step": 311
},
{
"epoch": 2.996784565916399,
"eval_accuracy": 0.9486669447207161,
"eval_f1": 0.6676790685902303,
"eval_loss": 0.1634686440229416,
"eval_precision": 0.6209981167608286,
"eval_recall": 0.7219485495347564,
"eval_runtime": 5.9009,
"eval_samples_per_second": 426.884,
"eval_steps_per_second": 53.382,
"step": 466
},
{
"epoch": 3.215434083601286,
"grad_norm": 0.9288749098777771,
"learning_rate": 3.387096774193548e-05,
"loss": 0.1246,
"step": 500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9492604831723828,
"eval_f1": 0.6711919630735812,
"eval_loss": 0.20466655492782593,
"eval_precision": 0.665948275862069,
"eval_recall": 0.6765188834154351,
"eval_runtime": 5.9155,
"eval_samples_per_second": 425.831,
"eval_steps_per_second": 53.25,
"step": 622
},
{
"epoch": 4.996784565916399,
"eval_accuracy": 0.9479771567904007,
"eval_f1": 0.6827731092436975,
"eval_loss": 0.2133806049823761,
"eval_precision": 0.6562342251388188,
"eval_recall": 0.7115489874110563,
"eval_runtime": 5.8759,
"eval_samples_per_second": 428.701,
"eval_steps_per_second": 53.609,
"step": 777
},
{
"epoch": 6.0,
"eval_accuracy": 0.949388815810581,
"eval_f1": 0.6795913020696882,
"eval_loss": 0.2258971482515335,
"eval_precision": 0.6517587939698493,
"eval_recall": 0.7099069512862616,
"eval_runtime": 5.9106,
"eval_samples_per_second": 426.187,
"eval_steps_per_second": 53.294,
"step": 933
},
{
"epoch": 6.430868167202572,
"grad_norm": 0.18494442105293274,
"learning_rate": 1.774193548387097e-05,
"loss": 0.0242,
"step": 1000
},
{
"epoch": 6.996784565916399,
"eval_accuracy": 0.9496615226667522,
"eval_f1": 0.6946045049764275,
"eval_loss": 0.24502409994602203,
"eval_precision": 0.6659969864389754,
"eval_recall": 0.7257799671592775,
"eval_runtime": 5.8918,
"eval_samples_per_second": 427.542,
"eval_steps_per_second": 53.464,
"step": 1088
},
{
"epoch": 8.0,
"eval_accuracy": 0.9490679842150855,
"eval_f1": 0.6841015018125324,
"eval_loss": 0.26501980423927307,
"eval_precision": 0.6491400491400492,
"eval_recall": 0.7230432402846196,
"eval_runtime": 5.9184,
"eval_samples_per_second": 425.624,
"eval_steps_per_second": 53.224,
"step": 1244
},
{
"epoch": 8.996784565916398,
"eval_accuracy": 0.9497577721454009,
"eval_f1": 0.687797147385103,
"eval_loss": 0.27453720569610596,
"eval_precision": 0.664624808575804,
"eval_recall": 0.7126436781609196,
"eval_runtime": 5.9283,
"eval_samples_per_second": 424.913,
"eval_steps_per_second": 53.135,
"step": 1399
},
{
"epoch": 9.646302250803858,
"grad_norm": 0.35425594449043274,
"learning_rate": 1.6129032258064516e-06,
"loss": 0.0083,
"step": 1500
},
{
"epoch": 9.967845659163988,
"eval_accuracy": 0.950255061118419,
"eval_f1": 0.6896008403361344,
"eval_loss": 0.27744925022125244,
"eval_precision": 0.662796567390207,
"eval_recall": 0.7186644772851669,
"eval_runtime": 6.1493,
"eval_samples_per_second": 409.643,
"eval_steps_per_second": 51.226,
"step": 1550
},
{
"epoch": 9.967845659163988,
"step": 1550,
"total_flos": 4644619911314910.0,
"train_loss": 0.050868147861573,
"train_runtime": 855.1929,
"train_samples_per_second": 116.102,
"train_steps_per_second": 1.812
}
],
"logging_steps": 500,
"max_steps": 1550,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4644619911314910.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|