Geofin5 / checkpoint-204 /trainer_state.json
luukschmitz's picture
Upload folder using huggingface_hub
c8a81cc verified
{
"best_metric": 0.9263564944267273,
"best_model_checkpoint": "Geofin5/checkpoint-204",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.029411764705882353,
"grad_norm": 7.102517604827881,
"learning_rate": 1.2195121951219514e-06,
"loss": 1.3725,
"step": 2
},
{
"epoch": 0.058823529411764705,
"grad_norm": 5.622996807098389,
"learning_rate": 2.4390243902439027e-06,
"loss": 1.3922,
"step": 4
},
{
"epoch": 0.08823529411764706,
"grad_norm": 7.666197299957275,
"learning_rate": 3.6585365853658537e-06,
"loss": 1.3371,
"step": 6
},
{
"epoch": 0.11764705882352941,
"grad_norm": 5.953295707702637,
"learning_rate": 4.8780487804878055e-06,
"loss": 1.322,
"step": 8
},
{
"epoch": 0.14705882352941177,
"grad_norm": 4.027065277099609,
"learning_rate": 6.0975609756097564e-06,
"loss": 1.2841,
"step": 10
},
{
"epoch": 0.17647058823529413,
"grad_norm": 6.355094909667969,
"learning_rate": 7.317073170731707e-06,
"loss": 1.2458,
"step": 12
},
{
"epoch": 0.20588235294117646,
"grad_norm": 4.935754776000977,
"learning_rate": 8.53658536585366e-06,
"loss": 1.2534,
"step": 14
},
{
"epoch": 0.23529411764705882,
"grad_norm": 3.921065330505371,
"learning_rate": 9.756097560975611e-06,
"loss": 1.2224,
"step": 16
},
{
"epoch": 0.2647058823529412,
"grad_norm": 4.3277482986450195,
"learning_rate": 1.0975609756097562e-05,
"loss": 1.125,
"step": 18
},
{
"epoch": 0.29411764705882354,
"grad_norm": 4.808041572570801,
"learning_rate": 1.2195121951219513e-05,
"loss": 1.0728,
"step": 20
},
{
"epoch": 0.3235294117647059,
"grad_norm": 2.6956660747528076,
"learning_rate": 1.3414634146341466e-05,
"loss": 1.2269,
"step": 22
},
{
"epoch": 0.35294117647058826,
"grad_norm": 2.8759944438934326,
"learning_rate": 1.4634146341463415e-05,
"loss": 1.094,
"step": 24
},
{
"epoch": 0.38235294117647056,
"grad_norm": 5.900161266326904,
"learning_rate": 1.5853658536585366e-05,
"loss": 1.1418,
"step": 26
},
{
"epoch": 0.4117647058823529,
"grad_norm": 8.949163436889648,
"learning_rate": 1.707317073170732e-05,
"loss": 1.1931,
"step": 28
},
{
"epoch": 0.4411764705882353,
"grad_norm": 4.940980911254883,
"learning_rate": 1.8292682926829268e-05,
"loss": 1.0575,
"step": 30
},
{
"epoch": 0.47058823529411764,
"grad_norm": 4.716662406921387,
"learning_rate": 1.9512195121951222e-05,
"loss": 1.0049,
"step": 32
},
{
"epoch": 0.5,
"grad_norm": 9.422857284545898,
"learning_rate": 2.073170731707317e-05,
"loss": 1.1533,
"step": 34
},
{
"epoch": 0.5294117647058824,
"grad_norm": 7.63829231262207,
"learning_rate": 2.1951219512195124e-05,
"loss": 1.1386,
"step": 36
},
{
"epoch": 0.5588235294117647,
"grad_norm": 4.745357036590576,
"learning_rate": 2.3170731707317075e-05,
"loss": 0.9919,
"step": 38
},
{
"epoch": 0.5882352941176471,
"grad_norm": 4.796962261199951,
"learning_rate": 2.4390243902439026e-05,
"loss": 0.9971,
"step": 40
},
{
"epoch": 0.6176470588235294,
"grad_norm": 4.623069763183594,
"learning_rate": 2.5609756097560977e-05,
"loss": 1.016,
"step": 42
},
{
"epoch": 0.6470588235294118,
"grad_norm": 7.526188373565674,
"learning_rate": 2.682926829268293e-05,
"loss": 1.1257,
"step": 44
},
{
"epoch": 0.6764705882352942,
"grad_norm": 6.750401973724365,
"learning_rate": 2.8048780487804882e-05,
"loss": 1.0988,
"step": 46
},
{
"epoch": 0.7058823529411765,
"grad_norm": 5.956785678863525,
"learning_rate": 2.926829268292683e-05,
"loss": 1.0436,
"step": 48
},
{
"epoch": 0.7352941176470589,
"grad_norm": 6.079898834228516,
"learning_rate": 3.048780487804878e-05,
"loss": 1.0277,
"step": 50
},
{
"epoch": 0.7647058823529411,
"grad_norm": 4.464956760406494,
"learning_rate": 3.170731707317073e-05,
"loss": 0.8688,
"step": 52
},
{
"epoch": 0.7941176470588235,
"grad_norm": 6.561495304107666,
"learning_rate": 3.292682926829269e-05,
"loss": 0.8962,
"step": 54
},
{
"epoch": 0.8235294117647058,
"grad_norm": 6.373690128326416,
"learning_rate": 3.353658536585366e-05,
"loss": 1.0343,
"step": 56
},
{
"epoch": 0.8529411764705882,
"grad_norm": 8.000486373901367,
"learning_rate": 3.475609756097561e-05,
"loss": 1.0543,
"step": 58
},
{
"epoch": 0.8823529411764706,
"grad_norm": 5.830896854400635,
"learning_rate": 3.597560975609756e-05,
"loss": 1.0761,
"step": 60
},
{
"epoch": 0.9117647058823529,
"grad_norm": 10.151026725769043,
"learning_rate": 3.7195121951219514e-05,
"loss": 1.0737,
"step": 62
},
{
"epoch": 0.9411764705882353,
"grad_norm": 9.73624038696289,
"learning_rate": 3.8414634146341465e-05,
"loss": 0.9112,
"step": 64
},
{
"epoch": 0.9705882352941176,
"grad_norm": 9.713314056396484,
"learning_rate": 3.9634146341463416e-05,
"loss": 0.8498,
"step": 66
},
{
"epoch": 1.0,
"grad_norm": 10.631162643432617,
"learning_rate": 4.085365853658537e-05,
"loss": 0.9937,
"step": 68
},
{
"epoch": 1.0,
"eval_accuracy": 0.5564853556485355,
"eval_f1_macro": 0.39598376878805575,
"eval_f1_micro": 0.5564853556485355,
"eval_f1_weighted": 0.4741897095372058,
"eval_loss": 1.0355333089828491,
"eval_precision_macro": 0.5140887485587238,
"eval_precision_micro": 0.5564853556485355,
"eval_precision_weighted": 0.5576172702264479,
"eval_recall_macro": 0.4827948905153814,
"eval_recall_micro": 0.5564853556485355,
"eval_recall_weighted": 0.5564853556485355,
"eval_runtime": 0.6549,
"eval_samples_per_second": 729.907,
"eval_steps_per_second": 12.216,
"step": 68
},
{
"epoch": 1.0294117647058822,
"grad_norm": 7.002508163452148,
"learning_rate": 4.207317073170732e-05,
"loss": 0.9379,
"step": 70
},
{
"epoch": 1.0588235294117647,
"grad_norm": 9.145544052124023,
"learning_rate": 4.329268292682927e-05,
"loss": 0.8374,
"step": 72
},
{
"epoch": 1.088235294117647,
"grad_norm": 10.859468460083008,
"learning_rate": 4.451219512195122e-05,
"loss": 1.0609,
"step": 74
},
{
"epoch": 1.1176470588235294,
"grad_norm": 5.649864196777344,
"learning_rate": 4.573170731707318e-05,
"loss": 0.8415,
"step": 76
},
{
"epoch": 1.1470588235294117,
"grad_norm": 7.1180877685546875,
"learning_rate": 4.695121951219512e-05,
"loss": 0.9504,
"step": 78
},
{
"epoch": 1.1764705882352942,
"grad_norm": 11.018819808959961,
"learning_rate": 4.817073170731707e-05,
"loss": 0.9917,
"step": 80
},
{
"epoch": 1.2058823529411764,
"grad_norm": 9.096946716308594,
"learning_rate": 4.9390243902439024e-05,
"loss": 0.7748,
"step": 82
},
{
"epoch": 1.2352941176470589,
"grad_norm": 14.755735397338867,
"learning_rate": 4.993188010899183e-05,
"loss": 0.8427,
"step": 84
},
{
"epoch": 1.2647058823529411,
"grad_norm": 9.15238094329834,
"learning_rate": 4.979564032697548e-05,
"loss": 0.8695,
"step": 86
},
{
"epoch": 1.2941176470588236,
"grad_norm": 10.265277862548828,
"learning_rate": 4.9659400544959125e-05,
"loss": 0.6385,
"step": 88
},
{
"epoch": 1.3235294117647058,
"grad_norm": 9.704151153564453,
"learning_rate": 4.952316076294278e-05,
"loss": 0.756,
"step": 90
},
{
"epoch": 1.3529411764705883,
"grad_norm": 9.141586303710938,
"learning_rate": 4.9386920980926435e-05,
"loss": 0.7567,
"step": 92
},
{
"epoch": 1.3823529411764706,
"grad_norm": 9.520133018493652,
"learning_rate": 4.925068119891008e-05,
"loss": 0.9081,
"step": 94
},
{
"epoch": 1.4117647058823528,
"grad_norm": 17.20820426940918,
"learning_rate": 4.911444141689374e-05,
"loss": 0.9661,
"step": 96
},
{
"epoch": 1.4411764705882353,
"grad_norm": 9.26347827911377,
"learning_rate": 4.8978201634877385e-05,
"loss": 0.8661,
"step": 98
},
{
"epoch": 1.4705882352941178,
"grad_norm": 12.740453720092773,
"learning_rate": 4.884196185286104e-05,
"loss": 0.8978,
"step": 100
},
{
"epoch": 1.5,
"grad_norm": 8.899398803710938,
"learning_rate": 4.870572207084469e-05,
"loss": 0.6809,
"step": 102
},
{
"epoch": 1.5294117647058822,
"grad_norm": 13.515447616577148,
"learning_rate": 4.8569482288828335e-05,
"loss": 0.7378,
"step": 104
},
{
"epoch": 1.5588235294117647,
"grad_norm": 6.7167439460754395,
"learning_rate": 4.843324250681199e-05,
"loss": 0.7376,
"step": 106
},
{
"epoch": 1.5882352941176472,
"grad_norm": 19.798791885375977,
"learning_rate": 4.8297002724795645e-05,
"loss": 0.7125,
"step": 108
},
{
"epoch": 1.6176470588235294,
"grad_norm": Infinity,
"learning_rate": 4.822888283378747e-05,
"loss": 0.7425,
"step": 110
},
{
"epoch": 1.6470588235294117,
"grad_norm": 8.12073040008545,
"learning_rate": 4.809264305177112e-05,
"loss": 0.672,
"step": 112
},
{
"epoch": 1.6764705882352942,
"grad_norm": 7.961780071258545,
"learning_rate": 4.795640326975477e-05,
"loss": 0.7023,
"step": 114
},
{
"epoch": 1.7058823529411766,
"grad_norm": 15.105574607849121,
"learning_rate": 4.782016348773842e-05,
"loss": 0.6107,
"step": 116
},
{
"epoch": 1.7352941176470589,
"grad_norm": 15.375845909118652,
"learning_rate": 4.768392370572207e-05,
"loss": 0.648,
"step": 118
},
{
"epoch": 1.7647058823529411,
"grad_norm": 11.371450424194336,
"learning_rate": 4.7547683923705725e-05,
"loss": 0.7948,
"step": 120
},
{
"epoch": 1.7941176470588234,
"grad_norm": 13.418404579162598,
"learning_rate": 4.741144414168938e-05,
"loss": 0.5621,
"step": 122
},
{
"epoch": 1.8235294117647058,
"grad_norm": 14.923059463500977,
"learning_rate": 4.727520435967303e-05,
"loss": 0.7301,
"step": 124
},
{
"epoch": 1.8529411764705883,
"grad_norm": 9.319025993347168,
"learning_rate": 4.713896457765668e-05,
"loss": 0.8735,
"step": 126
},
{
"epoch": 1.8823529411764706,
"grad_norm": 7.903226375579834,
"learning_rate": 4.700272479564033e-05,
"loss": 0.7913,
"step": 128
},
{
"epoch": 1.9117647058823528,
"grad_norm": 13.043506622314453,
"learning_rate": 4.686648501362398e-05,
"loss": 0.8099,
"step": 130
},
{
"epoch": 1.9411764705882353,
"grad_norm": 12.253973007202148,
"learning_rate": 4.673024523160763e-05,
"loss": 0.7651,
"step": 132
},
{
"epoch": 1.9705882352941178,
"grad_norm": 9.918290138244629,
"learning_rate": 4.659400544959128e-05,
"loss": 0.7537,
"step": 134
},
{
"epoch": 2.0,
"grad_norm": 17.887121200561523,
"learning_rate": 4.6457765667574935e-05,
"loss": 0.8837,
"step": 136
},
{
"epoch": 2.0,
"eval_accuracy": 0.6401673640167364,
"eval_f1_macro": 0.5112056699141869,
"eval_f1_micro": 0.6401673640167364,
"eval_f1_weighted": 0.623119203970463,
"eval_loss": 0.9266994595527649,
"eval_precision_macro": 0.6451808449222778,
"eval_precision_micro": 0.6401673640167364,
"eval_precision_weighted": 0.6880713224888448,
"eval_recall_macro": 0.5646276187019543,
"eval_recall_micro": 0.6401673640167364,
"eval_recall_weighted": 0.6401673640167364,
"eval_runtime": 0.6577,
"eval_samples_per_second": 726.807,
"eval_steps_per_second": 12.164,
"step": 136
},
{
"epoch": 2.0294117647058822,
"grad_norm": 12.88015365600586,
"learning_rate": 4.632152588555859e-05,
"loss": 0.6675,
"step": 138
},
{
"epoch": 2.0588235294117645,
"grad_norm": 7.740164756774902,
"learning_rate": 4.618528610354224e-05,
"loss": 0.726,
"step": 140
},
{
"epoch": 2.088235294117647,
"grad_norm": 6.169182777404785,
"learning_rate": 4.604904632152589e-05,
"loss": 0.5834,
"step": 142
},
{
"epoch": 2.1176470588235294,
"grad_norm": 5.821132659912109,
"learning_rate": 4.591280653950954e-05,
"loss": 0.5636,
"step": 144
},
{
"epoch": 2.1470588235294117,
"grad_norm": 7.872137069702148,
"learning_rate": 4.577656675749319e-05,
"loss": 0.566,
"step": 146
},
{
"epoch": 2.176470588235294,
"grad_norm": 7.407035827636719,
"learning_rate": 4.564032697547684e-05,
"loss": 0.5023,
"step": 148
},
{
"epoch": 2.2058823529411766,
"grad_norm": 8.054471015930176,
"learning_rate": 4.550408719346049e-05,
"loss": 0.4304,
"step": 150
},
{
"epoch": 2.235294117647059,
"grad_norm": 20.935813903808594,
"learning_rate": 4.5367847411444145e-05,
"loss": 0.5301,
"step": 152
},
{
"epoch": 2.264705882352941,
"grad_norm": 15.824753761291504,
"learning_rate": 4.52316076294278e-05,
"loss": 0.5815,
"step": 154
},
{
"epoch": 2.2941176470588234,
"grad_norm": 11.326836585998535,
"learning_rate": 4.509536784741145e-05,
"loss": 0.7101,
"step": 156
},
{
"epoch": 2.323529411764706,
"grad_norm": 27.99034309387207,
"learning_rate": 4.4959128065395095e-05,
"loss": 0.6138,
"step": 158
},
{
"epoch": 2.3529411764705883,
"grad_norm": 7.2306623458862305,
"learning_rate": 4.482288828337875e-05,
"loss": 0.5515,
"step": 160
},
{
"epoch": 2.3823529411764706,
"grad_norm": 15.374252319335938,
"learning_rate": 4.46866485013624e-05,
"loss": 0.7274,
"step": 162
},
{
"epoch": 2.411764705882353,
"grad_norm": 11.15127182006836,
"learning_rate": 4.4550408719346046e-05,
"loss": 0.685,
"step": 164
},
{
"epoch": 2.4411764705882355,
"grad_norm": 18.0255069732666,
"learning_rate": 4.44141689373297e-05,
"loss": 0.6403,
"step": 166
},
{
"epoch": 2.4705882352941178,
"grad_norm": 15.681634902954102,
"learning_rate": 4.4277929155313355e-05,
"loss": 0.5377,
"step": 168
},
{
"epoch": 2.5,
"grad_norm": 13.26118278503418,
"learning_rate": 4.414168937329701e-05,
"loss": 0.6531,
"step": 170
},
{
"epoch": 2.5294117647058822,
"grad_norm": 5.877330780029297,
"learning_rate": 4.400544959128066e-05,
"loss": 0.4888,
"step": 172
},
{
"epoch": 2.5588235294117645,
"grad_norm": 17.445058822631836,
"learning_rate": 4.3869209809264305e-05,
"loss": 0.5194,
"step": 174
},
{
"epoch": 2.588235294117647,
"grad_norm": 11.56258487701416,
"learning_rate": 4.373297002724796e-05,
"loss": 0.3939,
"step": 176
},
{
"epoch": 2.6176470588235294,
"grad_norm": 14.838981628417969,
"learning_rate": 4.359673024523161e-05,
"loss": 0.4646,
"step": 178
},
{
"epoch": 2.6470588235294117,
"grad_norm": 16.686960220336914,
"learning_rate": 4.3460490463215255e-05,
"loss": 0.4943,
"step": 180
},
{
"epoch": 2.6764705882352944,
"grad_norm": 13.008139610290527,
"learning_rate": 4.332425068119891e-05,
"loss": 0.6352,
"step": 182
},
{
"epoch": 2.7058823529411766,
"grad_norm": 18.054786682128906,
"learning_rate": 4.3188010899182565e-05,
"loss": 0.4034,
"step": 184
},
{
"epoch": 2.735294117647059,
"grad_norm": 9.771907806396484,
"learning_rate": 4.305177111716621e-05,
"loss": 0.3427,
"step": 186
},
{
"epoch": 2.764705882352941,
"grad_norm": 8.055870056152344,
"learning_rate": 4.291553133514987e-05,
"loss": 0.3218,
"step": 188
},
{
"epoch": 2.7941176470588234,
"grad_norm": 14.025898933410645,
"learning_rate": 4.2779291553133515e-05,
"loss": 0.5226,
"step": 190
},
{
"epoch": 2.8235294117647056,
"grad_norm": 7.227923393249512,
"learning_rate": 4.264305177111717e-05,
"loss": 0.3266,
"step": 192
},
{
"epoch": 2.8529411764705883,
"grad_norm": 15.04366397857666,
"learning_rate": 4.250681198910082e-05,
"loss": 0.6136,
"step": 194
},
{
"epoch": 2.8823529411764706,
"grad_norm": 15.145903587341309,
"learning_rate": 4.237057220708447e-05,
"loss": 0.4222,
"step": 196
},
{
"epoch": 2.911764705882353,
"grad_norm": 14.604679107666016,
"learning_rate": 4.223433242506813e-05,
"loss": 0.6989,
"step": 198
},
{
"epoch": 2.9411764705882355,
"grad_norm": 10.518308639526367,
"learning_rate": 4.2098092643051775e-05,
"loss": 0.4435,
"step": 200
},
{
"epoch": 2.9705882352941178,
"grad_norm": 16.961441040039062,
"learning_rate": 4.196185286103542e-05,
"loss": 0.6116,
"step": 202
},
{
"epoch": 3.0,
"grad_norm": 14.586898803710938,
"learning_rate": 4.182561307901908e-05,
"loss": 0.4291,
"step": 204
},
{
"epoch": 3.0,
"eval_accuracy": 0.700836820083682,
"eval_f1_macro": 0.6193851541399454,
"eval_f1_micro": 0.700836820083682,
"eval_f1_weighted": 0.6928921325883705,
"eval_loss": 0.9263564944267273,
"eval_precision_macro": 0.6728486680867634,
"eval_precision_micro": 0.700836820083682,
"eval_precision_weighted": 0.7079874543114232,
"eval_recall_macro": 0.6147070748686357,
"eval_recall_micro": 0.700836820083682,
"eval_recall_weighted": 0.700836820083682,
"eval_runtime": 0.6574,
"eval_samples_per_second": 727.055,
"eval_steps_per_second": 12.168,
"step": 204
}
],
"logging_steps": 2,
"max_steps": 816,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 428613555234816.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}