Melo1512's picture
End of training
bf95d4c verified
{
"best_metric": 0.32666629552841187,
"best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_5/checkpoint-44",
"epoch": 71.42857142857143,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7142857142857143,
"eval_accuracy": 0.22916666666666666,
"eval_loss": 1.1105738878250122,
"eval_runtime": 0.6997,
"eval_samples_per_second": 205.812,
"eval_steps_per_second": 4.288,
"step": 1
},
{
"epoch": 1.4285714285714286,
"eval_accuracy": 0.2569444444444444,
"eval_loss": 1.098416805267334,
"eval_runtime": 0.6273,
"eval_samples_per_second": 229.555,
"eval_steps_per_second": 4.782,
"step": 2
},
{
"epoch": 2.857142857142857,
"eval_accuracy": 0.4097222222222222,
"eval_loss": 1.039996862411499,
"eval_runtime": 0.6547,
"eval_samples_per_second": 219.941,
"eval_steps_per_second": 4.582,
"step": 4
},
{
"epoch": 3.571428571428571,
"eval_accuracy": 0.5486111111111112,
"eval_loss": 0.99596107006073,
"eval_runtime": 0.6487,
"eval_samples_per_second": 221.995,
"eval_steps_per_second": 4.625,
"step": 5
},
{
"epoch": 5.0,
"eval_accuracy": 0.7291666666666666,
"eval_loss": 0.8868050575256348,
"eval_runtime": 0.7036,
"eval_samples_per_second": 204.658,
"eval_steps_per_second": 4.264,
"step": 7
},
{
"epoch": 5.714285714285714,
"eval_accuracy": 0.7777777777777778,
"eval_loss": 0.8263272047042847,
"eval_runtime": 0.7254,
"eval_samples_per_second": 198.509,
"eval_steps_per_second": 4.136,
"step": 8
},
{
"epoch": 6.428571428571429,
"eval_accuracy": 0.8055555555555556,
"eval_loss": 0.7650943994522095,
"eval_runtime": 0.6223,
"eval_samples_per_second": 231.4,
"eval_steps_per_second": 4.821,
"step": 9
},
{
"epoch": 7.142857142857143,
"grad_norm": 13.31224536895752,
"learning_rate": 4.000000000000001e-06,
"loss": 0.9808,
"step": 10
},
{
"epoch": 7.857142857142857,
"eval_accuracy": 0.8125,
"eval_loss": 0.6520677208900452,
"eval_runtime": 0.6795,
"eval_samples_per_second": 211.923,
"eval_steps_per_second": 4.415,
"step": 11
},
{
"epoch": 8.571428571428571,
"eval_accuracy": 0.8125,
"eval_loss": 0.605199933052063,
"eval_runtime": 0.6616,
"eval_samples_per_second": 217.658,
"eval_steps_per_second": 4.535,
"step": 12
},
{
"epoch": 10.0,
"eval_accuracy": 0.8125,
"eval_loss": 0.538772702217102,
"eval_runtime": 0.6326,
"eval_samples_per_second": 227.627,
"eval_steps_per_second": 4.742,
"step": 14
},
{
"epoch": 10.714285714285714,
"eval_accuracy": 0.8125,
"eval_loss": 0.5174447894096375,
"eval_runtime": 0.6152,
"eval_samples_per_second": 234.055,
"eval_steps_per_second": 4.876,
"step": 15
},
{
"epoch": 11.428571428571429,
"eval_accuracy": 0.8125,
"eval_loss": 0.503182590007782,
"eval_runtime": 0.6377,
"eval_samples_per_second": 225.804,
"eval_steps_per_second": 4.704,
"step": 16
},
{
"epoch": 12.857142857142858,
"eval_accuracy": 0.8125,
"eval_loss": 0.5022291541099548,
"eval_runtime": 0.6473,
"eval_samples_per_second": 222.471,
"eval_steps_per_second": 4.635,
"step": 18
},
{
"epoch": 13.571428571428571,
"eval_accuracy": 0.8194444444444444,
"eval_loss": 0.5044423341751099,
"eval_runtime": 0.6293,
"eval_samples_per_second": 228.838,
"eval_steps_per_second": 4.767,
"step": 19
},
{
"epoch": 14.285714285714286,
"grad_norm": 3.2047011852264404,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5431,
"step": 20
},
{
"epoch": 15.0,
"eval_accuracy": 0.8263888888888888,
"eval_loss": 0.47730180621147156,
"eval_runtime": 0.7212,
"eval_samples_per_second": 199.678,
"eval_steps_per_second": 4.16,
"step": 21
},
{
"epoch": 15.714285714285714,
"eval_accuracy": 0.8333333333333334,
"eval_loss": 0.4439104497432709,
"eval_runtime": 0.6519,
"eval_samples_per_second": 220.904,
"eval_steps_per_second": 4.602,
"step": 22
},
{
"epoch": 16.428571428571427,
"eval_accuracy": 0.8402777777777778,
"eval_loss": 0.41979074478149414,
"eval_runtime": 0.6167,
"eval_samples_per_second": 233.497,
"eval_steps_per_second": 4.865,
"step": 23
},
{
"epoch": 17.857142857142858,
"eval_accuracy": 0.8819444444444444,
"eval_loss": 0.38726139068603516,
"eval_runtime": 0.626,
"eval_samples_per_second": 230.043,
"eval_steps_per_second": 4.793,
"step": 25
},
{
"epoch": 18.571428571428573,
"eval_accuracy": 0.8888888888888888,
"eval_loss": 0.37299442291259766,
"eval_runtime": 0.6291,
"eval_samples_per_second": 228.912,
"eval_steps_per_second": 4.769,
"step": 26
},
{
"epoch": 20.0,
"eval_accuracy": 0.9027777777777778,
"eval_loss": 0.3773989677429199,
"eval_runtime": 0.63,
"eval_samples_per_second": 228.574,
"eval_steps_per_second": 4.762,
"step": 28
},
{
"epoch": 20.714285714285715,
"eval_accuracy": 0.9097222222222222,
"eval_loss": 0.37053972482681274,
"eval_runtime": 0.6212,
"eval_samples_per_second": 231.817,
"eval_steps_per_second": 4.83,
"step": 29
},
{
"epoch": 21.428571428571427,
"grad_norm": 5.860249996185303,
"learning_rate": 9.333333333333334e-06,
"loss": 0.4028,
"step": 30
},
{
"epoch": 21.428571428571427,
"eval_accuracy": 0.9097222222222222,
"eval_loss": 0.3586524724960327,
"eval_runtime": 0.6377,
"eval_samples_per_second": 225.813,
"eval_steps_per_second": 4.704,
"step": 30
},
{
"epoch": 22.857142857142858,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.36622118949890137,
"eval_runtime": 0.6654,
"eval_samples_per_second": 216.406,
"eval_steps_per_second": 4.508,
"step": 32
},
{
"epoch": 23.571428571428573,
"eval_accuracy": 0.8680555555555556,
"eval_loss": 0.37790825963020325,
"eval_runtime": 0.6384,
"eval_samples_per_second": 225.558,
"eval_steps_per_second": 4.699,
"step": 33
},
{
"epoch": 25.0,
"eval_accuracy": 0.8263888888888888,
"eval_loss": 0.43221160769462585,
"eval_runtime": 0.6264,
"eval_samples_per_second": 229.872,
"eval_steps_per_second": 4.789,
"step": 35
},
{
"epoch": 25.714285714285715,
"eval_accuracy": 0.8333333333333334,
"eval_loss": 0.39439037442207336,
"eval_runtime": 0.6534,
"eval_samples_per_second": 220.396,
"eval_steps_per_second": 4.592,
"step": 36
},
{
"epoch": 26.428571428571427,
"eval_accuracy": 0.8888888888888888,
"eval_loss": 0.3585418462753296,
"eval_runtime": 0.6327,
"eval_samples_per_second": 227.579,
"eval_steps_per_second": 4.741,
"step": 37
},
{
"epoch": 27.857142857142858,
"eval_accuracy": 0.8888888888888888,
"eval_loss": 0.3607942461967468,
"eval_runtime": 0.6624,
"eval_samples_per_second": 217.391,
"eval_steps_per_second": 4.529,
"step": 39
},
{
"epoch": 28.571428571428573,
"grad_norm": 3.4132988452911377,
"learning_rate": 8.000000000000001e-06,
"loss": 0.3497,
"step": 40
},
{
"epoch": 28.571428571428573,
"eval_accuracy": 0.8472222222222222,
"eval_loss": 0.39719662070274353,
"eval_runtime": 0.643,
"eval_samples_per_second": 223.955,
"eval_steps_per_second": 4.666,
"step": 40
},
{
"epoch": 30.0,
"eval_accuracy": 0.8611111111111112,
"eval_loss": 0.3804582953453064,
"eval_runtime": 0.6301,
"eval_samples_per_second": 228.526,
"eval_steps_per_second": 4.761,
"step": 42
},
{
"epoch": 30.714285714285715,
"eval_accuracy": 0.8819444444444444,
"eval_loss": 0.3610667586326599,
"eval_runtime": 0.6248,
"eval_samples_per_second": 230.471,
"eval_steps_per_second": 4.801,
"step": 43
},
{
"epoch": 31.428571428571427,
"eval_accuracy": 0.9166666666666666,
"eval_loss": 0.32666629552841187,
"eval_runtime": 0.6456,
"eval_samples_per_second": 223.058,
"eval_steps_per_second": 4.647,
"step": 44
},
{
"epoch": 32.857142857142854,
"eval_accuracy": 0.9027777777777778,
"eval_loss": 0.3402611017227173,
"eval_runtime": 0.6337,
"eval_samples_per_second": 227.247,
"eval_steps_per_second": 4.734,
"step": 46
},
{
"epoch": 33.57142857142857,
"eval_accuracy": 0.875,
"eval_loss": 0.37514248490333557,
"eval_runtime": 0.7032,
"eval_samples_per_second": 204.765,
"eval_steps_per_second": 4.266,
"step": 47
},
{
"epoch": 35.0,
"eval_accuracy": 0.8680555555555556,
"eval_loss": 0.3801332414150238,
"eval_runtime": 0.659,
"eval_samples_per_second": 218.502,
"eval_steps_per_second": 4.552,
"step": 49
},
{
"epoch": 35.714285714285715,
"grad_norm": 6.472348213195801,
"learning_rate": 6.666666666666667e-06,
"loss": 0.3278,
"step": 50
},
{
"epoch": 35.714285714285715,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.34991347789764404,
"eval_runtime": 0.6712,
"eval_samples_per_second": 214.545,
"eval_steps_per_second": 4.47,
"step": 50
},
{
"epoch": 36.42857142857143,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.33839675784111023,
"eval_runtime": 0.6365,
"eval_samples_per_second": 226.231,
"eval_steps_per_second": 4.713,
"step": 51
},
{
"epoch": 37.857142857142854,
"eval_accuracy": 0.8541666666666666,
"eval_loss": 0.3642105460166931,
"eval_runtime": 0.6301,
"eval_samples_per_second": 228.534,
"eval_steps_per_second": 4.761,
"step": 53
},
{
"epoch": 38.57142857142857,
"eval_accuracy": 0.8194444444444444,
"eval_loss": 0.3996630907058716,
"eval_runtime": 0.6609,
"eval_samples_per_second": 217.89,
"eval_steps_per_second": 4.539,
"step": 54
},
{
"epoch": 40.0,
"eval_accuracy": 0.8402777777777778,
"eval_loss": 0.3843066394329071,
"eval_runtime": 0.6636,
"eval_samples_per_second": 216.984,
"eval_steps_per_second": 4.521,
"step": 56
},
{
"epoch": 40.714285714285715,
"eval_accuracy": 0.8680555555555556,
"eval_loss": 0.3675690293312073,
"eval_runtime": 0.6194,
"eval_samples_per_second": 232.479,
"eval_steps_per_second": 4.843,
"step": 57
},
{
"epoch": 41.42857142857143,
"eval_accuracy": 0.9027777777777778,
"eval_loss": 0.3464236557483673,
"eval_runtime": 0.6267,
"eval_samples_per_second": 229.775,
"eval_steps_per_second": 4.787,
"step": 58
},
{
"epoch": 42.857142857142854,
"grad_norm": 6.519013404846191,
"learning_rate": 5.333333333333334e-06,
"loss": 0.3334,
"step": 60
},
{
"epoch": 42.857142857142854,
"eval_accuracy": 0.8819444444444444,
"eval_loss": 0.36175864934921265,
"eval_runtime": 0.7106,
"eval_samples_per_second": 202.636,
"eval_steps_per_second": 4.222,
"step": 60
},
{
"epoch": 43.57142857142857,
"eval_accuracy": 0.8194444444444444,
"eval_loss": 0.40056005120277405,
"eval_runtime": 0.6703,
"eval_samples_per_second": 214.814,
"eval_steps_per_second": 4.475,
"step": 61
},
{
"epoch": 45.0,
"eval_accuracy": 0.7638888888888888,
"eval_loss": 0.49312081933021545,
"eval_runtime": 0.7051,
"eval_samples_per_second": 204.23,
"eval_steps_per_second": 4.255,
"step": 63
},
{
"epoch": 45.714285714285715,
"eval_accuracy": 0.7708333333333334,
"eval_loss": 0.48445218801498413,
"eval_runtime": 0.71,
"eval_samples_per_second": 202.808,
"eval_steps_per_second": 4.225,
"step": 64
},
{
"epoch": 46.42857142857143,
"eval_accuracy": 0.7916666666666666,
"eval_loss": 0.4485209286212921,
"eval_runtime": 0.6303,
"eval_samples_per_second": 228.459,
"eval_steps_per_second": 4.76,
"step": 65
},
{
"epoch": 47.857142857142854,
"eval_accuracy": 0.8472222222222222,
"eval_loss": 0.378328800201416,
"eval_runtime": 0.6131,
"eval_samples_per_second": 234.855,
"eval_steps_per_second": 4.893,
"step": 67
},
{
"epoch": 48.57142857142857,
"eval_accuracy": 0.8472222222222222,
"eval_loss": 0.37234801054000854,
"eval_runtime": 0.6727,
"eval_samples_per_second": 214.05,
"eval_steps_per_second": 4.459,
"step": 68
},
{
"epoch": 50.0,
"grad_norm": 5.204492092132568,
"learning_rate": 4.000000000000001e-06,
"loss": 0.3334,
"step": 70
},
{
"epoch": 50.0,
"eval_accuracy": 0.8125,
"eval_loss": 0.407737672328949,
"eval_runtime": 0.6505,
"eval_samples_per_second": 221.382,
"eval_steps_per_second": 4.612,
"step": 70
},
{
"epoch": 50.714285714285715,
"eval_accuracy": 0.7986111111111112,
"eval_loss": 0.4380877912044525,
"eval_runtime": 0.7093,
"eval_samples_per_second": 203.024,
"eval_steps_per_second": 4.23,
"step": 71
},
{
"epoch": 51.42857142857143,
"eval_accuracy": 0.7847222222222222,
"eval_loss": 0.46269893646240234,
"eval_runtime": 0.6731,
"eval_samples_per_second": 213.937,
"eval_steps_per_second": 4.457,
"step": 72
},
{
"epoch": 52.857142857142854,
"eval_accuracy": 0.7986111111111112,
"eval_loss": 0.44445788860321045,
"eval_runtime": 0.6372,
"eval_samples_per_second": 225.995,
"eval_steps_per_second": 4.708,
"step": 74
},
{
"epoch": 53.57142857142857,
"eval_accuracy": 0.8125,
"eval_loss": 0.41410741209983826,
"eval_runtime": 0.6728,
"eval_samples_per_second": 214.026,
"eval_steps_per_second": 4.459,
"step": 75
},
{
"epoch": 55.0,
"eval_accuracy": 0.8680555555555556,
"eval_loss": 0.3488573431968689,
"eval_runtime": 0.6306,
"eval_samples_per_second": 228.368,
"eval_steps_per_second": 4.758,
"step": 77
},
{
"epoch": 55.714285714285715,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.33705562353134155,
"eval_runtime": 0.6859,
"eval_samples_per_second": 209.945,
"eval_steps_per_second": 4.374,
"step": 78
},
{
"epoch": 56.42857142857143,
"eval_accuracy": 0.8888888888888888,
"eval_loss": 0.3358408510684967,
"eval_runtime": 0.6867,
"eval_samples_per_second": 209.701,
"eval_steps_per_second": 4.369,
"step": 79
},
{
"epoch": 57.142857142857146,
"grad_norm": 5.973431587219238,
"learning_rate": 2.666666666666667e-06,
"loss": 0.3105,
"step": 80
},
{
"epoch": 57.857142857142854,
"eval_accuracy": 0.8680555555555556,
"eval_loss": 0.3538711965084076,
"eval_runtime": 0.7307,
"eval_samples_per_second": 197.072,
"eval_steps_per_second": 4.106,
"step": 81
},
{
"epoch": 58.57142857142857,
"eval_accuracy": 0.8541666666666666,
"eval_loss": 0.3678491413593292,
"eval_runtime": 0.6126,
"eval_samples_per_second": 235.055,
"eval_steps_per_second": 4.897,
"step": 82
},
{
"epoch": 60.0,
"eval_accuracy": 0.8263888888888888,
"eval_loss": 0.3930552899837494,
"eval_runtime": 0.633,
"eval_samples_per_second": 227.478,
"eval_steps_per_second": 4.739,
"step": 84
},
{
"epoch": 60.714285714285715,
"eval_accuracy": 0.8263888888888888,
"eval_loss": 0.3938286006450653,
"eval_runtime": 0.6516,
"eval_samples_per_second": 220.996,
"eval_steps_per_second": 4.604,
"step": 85
},
{
"epoch": 61.42857142857143,
"eval_accuracy": 0.8472222222222222,
"eval_loss": 0.3896949589252472,
"eval_runtime": 0.6582,
"eval_samples_per_second": 218.793,
"eval_steps_per_second": 4.558,
"step": 86
},
{
"epoch": 62.857142857142854,
"eval_accuracy": 0.8611111111111112,
"eval_loss": 0.3637922406196594,
"eval_runtime": 0.6651,
"eval_samples_per_second": 216.505,
"eval_steps_per_second": 4.511,
"step": 88
},
{
"epoch": 63.57142857142857,
"eval_accuracy": 0.875,
"eval_loss": 0.34960028529167175,
"eval_runtime": 0.6331,
"eval_samples_per_second": 227.443,
"eval_steps_per_second": 4.738,
"step": 89
},
{
"epoch": 64.28571428571429,
"grad_norm": 6.167888164520264,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.3061,
"step": 90
},
{
"epoch": 65.0,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.3304632306098938,
"eval_runtime": 0.6686,
"eval_samples_per_second": 215.369,
"eval_steps_per_second": 4.487,
"step": 91
},
{
"epoch": 65.71428571428571,
"eval_accuracy": 0.9027777777777778,
"eval_loss": 0.3283728361129761,
"eval_runtime": 0.6659,
"eval_samples_per_second": 216.261,
"eval_steps_per_second": 4.505,
"step": 92
},
{
"epoch": 66.42857142857143,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.3283740282058716,
"eval_runtime": 0.6699,
"eval_samples_per_second": 214.972,
"eval_steps_per_second": 4.479,
"step": 93
},
{
"epoch": 67.85714285714286,
"eval_accuracy": 0.8958333333333334,
"eval_loss": 0.33374664187431335,
"eval_runtime": 0.668,
"eval_samples_per_second": 215.572,
"eval_steps_per_second": 4.491,
"step": 95
},
{
"epoch": 68.57142857142857,
"eval_accuracy": 0.8888888888888888,
"eval_loss": 0.33741050958633423,
"eval_runtime": 0.6191,
"eval_samples_per_second": 232.594,
"eval_steps_per_second": 4.846,
"step": 96
},
{
"epoch": 70.0,
"eval_accuracy": 0.875,
"eval_loss": 0.34418821334838867,
"eval_runtime": 0.6959,
"eval_samples_per_second": 206.929,
"eval_steps_per_second": 4.311,
"step": 98
},
{
"epoch": 70.71428571428571,
"eval_accuracy": 0.875,
"eval_loss": 0.34521356225013733,
"eval_runtime": 0.6376,
"eval_samples_per_second": 225.852,
"eval_steps_per_second": 4.705,
"step": 99
},
{
"epoch": 71.42857142857143,
"grad_norm": 5.671431541442871,
"learning_rate": 0.0,
"loss": 0.3137,
"step": 100
},
{
"epoch": 71.42857142857143,
"eval_accuracy": 0.875,
"eval_loss": 0.3459942042827606,
"eval_runtime": 0.7141,
"eval_samples_per_second": 201.642,
"eval_steps_per_second": 4.201,
"step": 100
},
{
"epoch": 71.42857142857143,
"step": 100,
"total_flos": 5.956344520589353e+17,
"train_loss": 0.420122013092041,
"train_runtime": 326.0769,
"train_samples_per_second": 130.644,
"train_steps_per_second": 0.307
}
],
"logging_steps": 10,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.956344520589353e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}