|
{ |
|
"best_metric": 0.32666629552841187, |
|
"best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_5/checkpoint-44", |
|
"epoch": 71.42857142857143, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"eval_accuracy": 0.22916666666666666, |
|
"eval_loss": 1.1105738878250122, |
|
"eval_runtime": 0.6997, |
|
"eval_samples_per_second": 205.812, |
|
"eval_steps_per_second": 4.288, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"eval_accuracy": 0.2569444444444444, |
|
"eval_loss": 1.098416805267334, |
|
"eval_runtime": 0.6273, |
|
"eval_samples_per_second": 229.555, |
|
"eval_steps_per_second": 4.782, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_accuracy": 0.4097222222222222, |
|
"eval_loss": 1.039996862411499, |
|
"eval_runtime": 0.6547, |
|
"eval_samples_per_second": 219.941, |
|
"eval_steps_per_second": 4.582, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"eval_accuracy": 0.5486111111111112, |
|
"eval_loss": 0.99596107006073, |
|
"eval_runtime": 0.6487, |
|
"eval_samples_per_second": 221.995, |
|
"eval_steps_per_second": 4.625, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7291666666666666, |
|
"eval_loss": 0.8868050575256348, |
|
"eval_runtime": 0.7036, |
|
"eval_samples_per_second": 204.658, |
|
"eval_steps_per_second": 4.264, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.8263272047042847, |
|
"eval_runtime": 0.7254, |
|
"eval_samples_per_second": 198.509, |
|
"eval_steps_per_second": 4.136, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 6.428571428571429, |
|
"eval_accuracy": 0.8055555555555556, |
|
"eval_loss": 0.7650943994522095, |
|
"eval_runtime": 0.6223, |
|
"eval_samples_per_second": 231.4, |
|
"eval_steps_per_second": 4.821, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 13.31224536895752, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9808, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 7.857142857142857, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.6520677208900452, |
|
"eval_runtime": 0.6795, |
|
"eval_samples_per_second": 211.923, |
|
"eval_steps_per_second": 4.415, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.605199933052063, |
|
"eval_runtime": 0.6616, |
|
"eval_samples_per_second": 217.658, |
|
"eval_steps_per_second": 4.535, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.538772702217102, |
|
"eval_runtime": 0.6326, |
|
"eval_samples_per_second": 227.627, |
|
"eval_steps_per_second": 4.742, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 10.714285714285714, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.5174447894096375, |
|
"eval_runtime": 0.6152, |
|
"eval_samples_per_second": 234.055, |
|
"eval_steps_per_second": 4.876, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.503182590007782, |
|
"eval_runtime": 0.6377, |
|
"eval_samples_per_second": 225.804, |
|
"eval_steps_per_second": 4.704, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.5022291541099548, |
|
"eval_runtime": 0.6473, |
|
"eval_samples_per_second": 222.471, |
|
"eval_steps_per_second": 4.635, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 13.571428571428571, |
|
"eval_accuracy": 0.8194444444444444, |
|
"eval_loss": 0.5044423341751099, |
|
"eval_runtime": 0.6293, |
|
"eval_samples_per_second": 228.838, |
|
"eval_steps_per_second": 4.767, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 3.2047011852264404, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5431, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8263888888888888, |
|
"eval_loss": 0.47730180621147156, |
|
"eval_runtime": 0.7212, |
|
"eval_samples_per_second": 199.678, |
|
"eval_steps_per_second": 4.16, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 15.714285714285714, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.4439104497432709, |
|
"eval_runtime": 0.6519, |
|
"eval_samples_per_second": 220.904, |
|
"eval_steps_per_second": 4.602, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 16.428571428571427, |
|
"eval_accuracy": 0.8402777777777778, |
|
"eval_loss": 0.41979074478149414, |
|
"eval_runtime": 0.6167, |
|
"eval_samples_per_second": 233.497, |
|
"eval_steps_per_second": 4.865, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 17.857142857142858, |
|
"eval_accuracy": 0.8819444444444444, |
|
"eval_loss": 0.38726139068603516, |
|
"eval_runtime": 0.626, |
|
"eval_samples_per_second": 230.043, |
|
"eval_steps_per_second": 4.793, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 18.571428571428573, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.37299442291259766, |
|
"eval_runtime": 0.6291, |
|
"eval_samples_per_second": 228.912, |
|
"eval_steps_per_second": 4.769, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9027777777777778, |
|
"eval_loss": 0.3773989677429199, |
|
"eval_runtime": 0.63, |
|
"eval_samples_per_second": 228.574, |
|
"eval_steps_per_second": 4.762, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 20.714285714285715, |
|
"eval_accuracy": 0.9097222222222222, |
|
"eval_loss": 0.37053972482681274, |
|
"eval_runtime": 0.6212, |
|
"eval_samples_per_second": 231.817, |
|
"eval_steps_per_second": 4.83, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"grad_norm": 5.860249996185303, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.4028, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"eval_accuracy": 0.9097222222222222, |
|
"eval_loss": 0.3586524724960327, |
|
"eval_runtime": 0.6377, |
|
"eval_samples_per_second": 225.813, |
|
"eval_steps_per_second": 4.704, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.36622118949890137, |
|
"eval_runtime": 0.6654, |
|
"eval_samples_per_second": 216.406, |
|
"eval_steps_per_second": 4.508, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 23.571428571428573, |
|
"eval_accuracy": 0.8680555555555556, |
|
"eval_loss": 0.37790825963020325, |
|
"eval_runtime": 0.6384, |
|
"eval_samples_per_second": 225.558, |
|
"eval_steps_per_second": 4.699, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8263888888888888, |
|
"eval_loss": 0.43221160769462585, |
|
"eval_runtime": 0.6264, |
|
"eval_samples_per_second": 229.872, |
|
"eval_steps_per_second": 4.789, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.39439037442207336, |
|
"eval_runtime": 0.6534, |
|
"eval_samples_per_second": 220.396, |
|
"eval_steps_per_second": 4.592, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 26.428571428571427, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.3585418462753296, |
|
"eval_runtime": 0.6327, |
|
"eval_samples_per_second": 227.579, |
|
"eval_steps_per_second": 4.741, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 27.857142857142858, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.3607942461967468, |
|
"eval_runtime": 0.6624, |
|
"eval_samples_per_second": 217.391, |
|
"eval_steps_per_second": 4.529, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 3.4132988452911377, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.3497, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"eval_accuracy": 0.8472222222222222, |
|
"eval_loss": 0.39719662070274353, |
|
"eval_runtime": 0.643, |
|
"eval_samples_per_second": 223.955, |
|
"eval_steps_per_second": 4.666, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_loss": 0.3804582953453064, |
|
"eval_runtime": 0.6301, |
|
"eval_samples_per_second": 228.526, |
|
"eval_steps_per_second": 4.761, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 30.714285714285715, |
|
"eval_accuracy": 0.8819444444444444, |
|
"eval_loss": 0.3610667586326599, |
|
"eval_runtime": 0.6248, |
|
"eval_samples_per_second": 230.471, |
|
"eval_steps_per_second": 4.801, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.32666629552841187, |
|
"eval_runtime": 0.6456, |
|
"eval_samples_per_second": 223.058, |
|
"eval_steps_per_second": 4.647, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"eval_accuracy": 0.9027777777777778, |
|
"eval_loss": 0.3402611017227173, |
|
"eval_runtime": 0.6337, |
|
"eval_samples_per_second": 227.247, |
|
"eval_steps_per_second": 4.734, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 33.57142857142857, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.37514248490333557, |
|
"eval_runtime": 0.7032, |
|
"eval_samples_per_second": 204.765, |
|
"eval_steps_per_second": 4.266, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8680555555555556, |
|
"eval_loss": 0.3801332414150238, |
|
"eval_runtime": 0.659, |
|
"eval_samples_per_second": 218.502, |
|
"eval_steps_per_second": 4.552, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 6.472348213195801, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3278, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.34991347789764404, |
|
"eval_runtime": 0.6712, |
|
"eval_samples_per_second": 214.545, |
|
"eval_steps_per_second": 4.47, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 36.42857142857143, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.33839675784111023, |
|
"eval_runtime": 0.6365, |
|
"eval_samples_per_second": 226.231, |
|
"eval_steps_per_second": 4.713, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 37.857142857142854, |
|
"eval_accuracy": 0.8541666666666666, |
|
"eval_loss": 0.3642105460166931, |
|
"eval_runtime": 0.6301, |
|
"eval_samples_per_second": 228.534, |
|
"eval_steps_per_second": 4.761, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 38.57142857142857, |
|
"eval_accuracy": 0.8194444444444444, |
|
"eval_loss": 0.3996630907058716, |
|
"eval_runtime": 0.6609, |
|
"eval_samples_per_second": 217.89, |
|
"eval_steps_per_second": 4.539, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8402777777777778, |
|
"eval_loss": 0.3843066394329071, |
|
"eval_runtime": 0.6636, |
|
"eval_samples_per_second": 216.984, |
|
"eval_steps_per_second": 4.521, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 40.714285714285715, |
|
"eval_accuracy": 0.8680555555555556, |
|
"eval_loss": 0.3675690293312073, |
|
"eval_runtime": 0.6194, |
|
"eval_samples_per_second": 232.479, |
|
"eval_steps_per_second": 4.843, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 41.42857142857143, |
|
"eval_accuracy": 0.9027777777777778, |
|
"eval_loss": 0.3464236557483673, |
|
"eval_runtime": 0.6267, |
|
"eval_samples_per_second": 229.775, |
|
"eval_steps_per_second": 4.787, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 6.519013404846191, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.3334, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"eval_accuracy": 0.8819444444444444, |
|
"eval_loss": 0.36175864934921265, |
|
"eval_runtime": 0.7106, |
|
"eval_samples_per_second": 202.636, |
|
"eval_steps_per_second": 4.222, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 43.57142857142857, |
|
"eval_accuracy": 0.8194444444444444, |
|
"eval_loss": 0.40056005120277405, |
|
"eval_runtime": 0.6703, |
|
"eval_samples_per_second": 214.814, |
|
"eval_steps_per_second": 4.475, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7638888888888888, |
|
"eval_loss": 0.49312081933021545, |
|
"eval_runtime": 0.7051, |
|
"eval_samples_per_second": 204.23, |
|
"eval_steps_per_second": 4.255, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"eval_accuracy": 0.7708333333333334, |
|
"eval_loss": 0.48445218801498413, |
|
"eval_runtime": 0.71, |
|
"eval_samples_per_second": 202.808, |
|
"eval_steps_per_second": 4.225, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 46.42857142857143, |
|
"eval_accuracy": 0.7916666666666666, |
|
"eval_loss": 0.4485209286212921, |
|
"eval_runtime": 0.6303, |
|
"eval_samples_per_second": 228.459, |
|
"eval_steps_per_second": 4.76, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 47.857142857142854, |
|
"eval_accuracy": 0.8472222222222222, |
|
"eval_loss": 0.378328800201416, |
|
"eval_runtime": 0.6131, |
|
"eval_samples_per_second": 234.855, |
|
"eval_steps_per_second": 4.893, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 48.57142857142857, |
|
"eval_accuracy": 0.8472222222222222, |
|
"eval_loss": 0.37234801054000854, |
|
"eval_runtime": 0.6727, |
|
"eval_samples_per_second": 214.05, |
|
"eval_steps_per_second": 4.459, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 5.204492092132568, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.3334, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.407737672328949, |
|
"eval_runtime": 0.6505, |
|
"eval_samples_per_second": 221.382, |
|
"eval_steps_per_second": 4.612, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 50.714285714285715, |
|
"eval_accuracy": 0.7986111111111112, |
|
"eval_loss": 0.4380877912044525, |
|
"eval_runtime": 0.7093, |
|
"eval_samples_per_second": 203.024, |
|
"eval_steps_per_second": 4.23, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 51.42857142857143, |
|
"eval_accuracy": 0.7847222222222222, |
|
"eval_loss": 0.46269893646240234, |
|
"eval_runtime": 0.6731, |
|
"eval_samples_per_second": 213.937, |
|
"eval_steps_per_second": 4.457, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 52.857142857142854, |
|
"eval_accuracy": 0.7986111111111112, |
|
"eval_loss": 0.44445788860321045, |
|
"eval_runtime": 0.6372, |
|
"eval_samples_per_second": 225.995, |
|
"eval_steps_per_second": 4.708, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 53.57142857142857, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.41410741209983826, |
|
"eval_runtime": 0.6728, |
|
"eval_samples_per_second": 214.026, |
|
"eval_steps_per_second": 4.459, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.8680555555555556, |
|
"eval_loss": 0.3488573431968689, |
|
"eval_runtime": 0.6306, |
|
"eval_samples_per_second": 228.368, |
|
"eval_steps_per_second": 4.758, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 55.714285714285715, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.33705562353134155, |
|
"eval_runtime": 0.6859, |
|
"eval_samples_per_second": 209.945, |
|
"eval_steps_per_second": 4.374, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 56.42857142857143, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.3358408510684967, |
|
"eval_runtime": 0.6867, |
|
"eval_samples_per_second": 209.701, |
|
"eval_steps_per_second": 4.369, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 57.142857142857146, |
|
"grad_norm": 5.973431587219238, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.3105, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 57.857142857142854, |
|
"eval_accuracy": 0.8680555555555556, |
|
"eval_loss": 0.3538711965084076, |
|
"eval_runtime": 0.7307, |
|
"eval_samples_per_second": 197.072, |
|
"eval_steps_per_second": 4.106, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 58.57142857142857, |
|
"eval_accuracy": 0.8541666666666666, |
|
"eval_loss": 0.3678491413593292, |
|
"eval_runtime": 0.6126, |
|
"eval_samples_per_second": 235.055, |
|
"eval_steps_per_second": 4.897, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8263888888888888, |
|
"eval_loss": 0.3930552899837494, |
|
"eval_runtime": 0.633, |
|
"eval_samples_per_second": 227.478, |
|
"eval_steps_per_second": 4.739, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 60.714285714285715, |
|
"eval_accuracy": 0.8263888888888888, |
|
"eval_loss": 0.3938286006450653, |
|
"eval_runtime": 0.6516, |
|
"eval_samples_per_second": 220.996, |
|
"eval_steps_per_second": 4.604, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 61.42857142857143, |
|
"eval_accuracy": 0.8472222222222222, |
|
"eval_loss": 0.3896949589252472, |
|
"eval_runtime": 0.6582, |
|
"eval_samples_per_second": 218.793, |
|
"eval_steps_per_second": 4.558, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 62.857142857142854, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_loss": 0.3637922406196594, |
|
"eval_runtime": 0.6651, |
|
"eval_samples_per_second": 216.505, |
|
"eval_steps_per_second": 4.511, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 63.57142857142857, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.34960028529167175, |
|
"eval_runtime": 0.6331, |
|
"eval_samples_per_second": 227.443, |
|
"eval_steps_per_second": 4.738, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 64.28571428571429, |
|
"grad_norm": 6.167888164520264, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.3061, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.3304632306098938, |
|
"eval_runtime": 0.6686, |
|
"eval_samples_per_second": 215.369, |
|
"eval_steps_per_second": 4.487, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 65.71428571428571, |
|
"eval_accuracy": 0.9027777777777778, |
|
"eval_loss": 0.3283728361129761, |
|
"eval_runtime": 0.6659, |
|
"eval_samples_per_second": 216.261, |
|
"eval_steps_per_second": 4.505, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 66.42857142857143, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.3283740282058716, |
|
"eval_runtime": 0.6699, |
|
"eval_samples_per_second": 214.972, |
|
"eval_steps_per_second": 4.479, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 67.85714285714286, |
|
"eval_accuracy": 0.8958333333333334, |
|
"eval_loss": 0.33374664187431335, |
|
"eval_runtime": 0.668, |
|
"eval_samples_per_second": 215.572, |
|
"eval_steps_per_second": 4.491, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 68.57142857142857, |
|
"eval_accuracy": 0.8888888888888888, |
|
"eval_loss": 0.33741050958633423, |
|
"eval_runtime": 0.6191, |
|
"eval_samples_per_second": 232.594, |
|
"eval_steps_per_second": 4.846, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.34418821334838867, |
|
"eval_runtime": 0.6959, |
|
"eval_samples_per_second": 206.929, |
|
"eval_steps_per_second": 4.311, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 70.71428571428571, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.34521356225013733, |
|
"eval_runtime": 0.6376, |
|
"eval_samples_per_second": 225.852, |
|
"eval_steps_per_second": 4.705, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 5.671431541442871, |
|
"learning_rate": 0.0, |
|
"loss": 0.3137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.3459942042827606, |
|
"eval_runtime": 0.7141, |
|
"eval_samples_per_second": 201.642, |
|
"eval_steps_per_second": 4.201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"step": 100, |
|
"total_flos": 5.956344520589353e+17, |
|
"train_loss": 0.420122013092041, |
|
"train_runtime": 326.0769, |
|
"train_samples_per_second": 130.644, |
|
"train_steps_per_second": 0.307 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.956344520589353e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|