{ "best_metric": 0.32666629552841187, "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_5/checkpoint-44", "epoch": 71.42857142857143, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7142857142857143, "eval_accuracy": 0.22916666666666666, "eval_loss": 1.1105738878250122, "eval_runtime": 0.6997, "eval_samples_per_second": 205.812, "eval_steps_per_second": 4.288, "step": 1 }, { "epoch": 1.4285714285714286, "eval_accuracy": 0.2569444444444444, "eval_loss": 1.098416805267334, "eval_runtime": 0.6273, "eval_samples_per_second": 229.555, "eval_steps_per_second": 4.782, "step": 2 }, { "epoch": 2.857142857142857, "eval_accuracy": 0.4097222222222222, "eval_loss": 1.039996862411499, "eval_runtime": 0.6547, "eval_samples_per_second": 219.941, "eval_steps_per_second": 4.582, "step": 4 }, { "epoch": 3.571428571428571, "eval_accuracy": 0.5486111111111112, "eval_loss": 0.99596107006073, "eval_runtime": 0.6487, "eval_samples_per_second": 221.995, "eval_steps_per_second": 4.625, "step": 5 }, { "epoch": 5.0, "eval_accuracy": 0.7291666666666666, "eval_loss": 0.8868050575256348, "eval_runtime": 0.7036, "eval_samples_per_second": 204.658, "eval_steps_per_second": 4.264, "step": 7 }, { "epoch": 5.714285714285714, "eval_accuracy": 0.7777777777777778, "eval_loss": 0.8263272047042847, "eval_runtime": 0.7254, "eval_samples_per_second": 198.509, "eval_steps_per_second": 4.136, "step": 8 }, { "epoch": 6.428571428571429, "eval_accuracy": 0.8055555555555556, "eval_loss": 0.7650943994522095, "eval_runtime": 0.6223, "eval_samples_per_second": 231.4, "eval_steps_per_second": 4.821, "step": 9 }, { "epoch": 7.142857142857143, "grad_norm": 13.31224536895752, "learning_rate": 4.000000000000001e-06, "loss": 0.9808, "step": 10 }, { "epoch": 7.857142857142857, "eval_accuracy": 0.8125, "eval_loss": 0.6520677208900452, "eval_runtime": 0.6795, "eval_samples_per_second": 211.923, "eval_steps_per_second": 4.415, "step": 11 }, { "epoch": 8.571428571428571, "eval_accuracy": 0.8125, "eval_loss": 0.605199933052063, "eval_runtime": 0.6616, "eval_samples_per_second": 217.658, "eval_steps_per_second": 4.535, "step": 12 }, { "epoch": 10.0, "eval_accuracy": 0.8125, "eval_loss": 0.538772702217102, "eval_runtime": 0.6326, "eval_samples_per_second": 227.627, "eval_steps_per_second": 4.742, "step": 14 }, { "epoch": 10.714285714285714, "eval_accuracy": 0.8125, "eval_loss": 0.5174447894096375, "eval_runtime": 0.6152, "eval_samples_per_second": 234.055, "eval_steps_per_second": 4.876, "step": 15 }, { "epoch": 11.428571428571429, "eval_accuracy": 0.8125, "eval_loss": 0.503182590007782, "eval_runtime": 0.6377, "eval_samples_per_second": 225.804, "eval_steps_per_second": 4.704, "step": 16 }, { "epoch": 12.857142857142858, "eval_accuracy": 0.8125, "eval_loss": 0.5022291541099548, "eval_runtime": 0.6473, "eval_samples_per_second": 222.471, "eval_steps_per_second": 4.635, "step": 18 }, { "epoch": 13.571428571428571, "eval_accuracy": 0.8194444444444444, "eval_loss": 0.5044423341751099, "eval_runtime": 0.6293, "eval_samples_per_second": 228.838, "eval_steps_per_second": 4.767, "step": 19 }, { "epoch": 14.285714285714286, "grad_norm": 3.2047011852264404, "learning_rate": 8.000000000000001e-06, "loss": 0.5431, "step": 20 }, { "epoch": 15.0, "eval_accuracy": 0.8263888888888888, "eval_loss": 0.47730180621147156, "eval_runtime": 0.7212, "eval_samples_per_second": 199.678, "eval_steps_per_second": 4.16, "step": 21 }, { "epoch": 15.714285714285714, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.4439104497432709, "eval_runtime": 0.6519, "eval_samples_per_second": 220.904, "eval_steps_per_second": 4.602, "step": 22 }, { "epoch": 16.428571428571427, "eval_accuracy": 0.8402777777777778, "eval_loss": 0.41979074478149414, "eval_runtime": 0.6167, "eval_samples_per_second": 233.497, "eval_steps_per_second": 4.865, "step": 23 }, { "epoch": 17.857142857142858, "eval_accuracy": 0.8819444444444444, "eval_loss": 0.38726139068603516, "eval_runtime": 0.626, "eval_samples_per_second": 230.043, "eval_steps_per_second": 4.793, "step": 25 }, { "epoch": 18.571428571428573, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.37299442291259766, "eval_runtime": 0.6291, "eval_samples_per_second": 228.912, "eval_steps_per_second": 4.769, "step": 26 }, { "epoch": 20.0, "eval_accuracy": 0.9027777777777778, "eval_loss": 0.3773989677429199, "eval_runtime": 0.63, "eval_samples_per_second": 228.574, "eval_steps_per_second": 4.762, "step": 28 }, { "epoch": 20.714285714285715, "eval_accuracy": 0.9097222222222222, "eval_loss": 0.37053972482681274, "eval_runtime": 0.6212, "eval_samples_per_second": 231.817, "eval_steps_per_second": 4.83, "step": 29 }, { "epoch": 21.428571428571427, "grad_norm": 5.860249996185303, "learning_rate": 9.333333333333334e-06, "loss": 0.4028, "step": 30 }, { "epoch": 21.428571428571427, "eval_accuracy": 0.9097222222222222, "eval_loss": 0.3586524724960327, "eval_runtime": 0.6377, "eval_samples_per_second": 225.813, "eval_steps_per_second": 4.704, "step": 30 }, { "epoch": 22.857142857142858, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.36622118949890137, "eval_runtime": 0.6654, "eval_samples_per_second": 216.406, "eval_steps_per_second": 4.508, "step": 32 }, { "epoch": 23.571428571428573, "eval_accuracy": 0.8680555555555556, "eval_loss": 0.37790825963020325, "eval_runtime": 0.6384, "eval_samples_per_second": 225.558, "eval_steps_per_second": 4.699, "step": 33 }, { "epoch": 25.0, "eval_accuracy": 0.8263888888888888, "eval_loss": 0.43221160769462585, "eval_runtime": 0.6264, "eval_samples_per_second": 229.872, "eval_steps_per_second": 4.789, "step": 35 }, { "epoch": 25.714285714285715, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.39439037442207336, "eval_runtime": 0.6534, "eval_samples_per_second": 220.396, "eval_steps_per_second": 4.592, "step": 36 }, { "epoch": 26.428571428571427, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.3585418462753296, "eval_runtime": 0.6327, "eval_samples_per_second": 227.579, "eval_steps_per_second": 4.741, "step": 37 }, { "epoch": 27.857142857142858, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.3607942461967468, "eval_runtime": 0.6624, "eval_samples_per_second": 217.391, "eval_steps_per_second": 4.529, "step": 39 }, { "epoch": 28.571428571428573, "grad_norm": 3.4132988452911377, "learning_rate": 8.000000000000001e-06, "loss": 0.3497, "step": 40 }, { "epoch": 28.571428571428573, "eval_accuracy": 0.8472222222222222, "eval_loss": 0.39719662070274353, "eval_runtime": 0.643, "eval_samples_per_second": 223.955, "eval_steps_per_second": 4.666, "step": 40 }, { "epoch": 30.0, "eval_accuracy": 0.8611111111111112, "eval_loss": 0.3804582953453064, "eval_runtime": 0.6301, "eval_samples_per_second": 228.526, "eval_steps_per_second": 4.761, "step": 42 }, { "epoch": 30.714285714285715, "eval_accuracy": 0.8819444444444444, "eval_loss": 0.3610667586326599, "eval_runtime": 0.6248, "eval_samples_per_second": 230.471, "eval_steps_per_second": 4.801, "step": 43 }, { "epoch": 31.428571428571427, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.32666629552841187, "eval_runtime": 0.6456, "eval_samples_per_second": 223.058, "eval_steps_per_second": 4.647, "step": 44 }, { "epoch": 32.857142857142854, "eval_accuracy": 0.9027777777777778, "eval_loss": 0.3402611017227173, "eval_runtime": 0.6337, "eval_samples_per_second": 227.247, "eval_steps_per_second": 4.734, "step": 46 }, { "epoch": 33.57142857142857, "eval_accuracy": 0.875, "eval_loss": 0.37514248490333557, "eval_runtime": 0.7032, "eval_samples_per_second": 204.765, "eval_steps_per_second": 4.266, "step": 47 }, { "epoch": 35.0, "eval_accuracy": 0.8680555555555556, "eval_loss": 0.3801332414150238, "eval_runtime": 0.659, "eval_samples_per_second": 218.502, "eval_steps_per_second": 4.552, "step": 49 }, { "epoch": 35.714285714285715, "grad_norm": 6.472348213195801, "learning_rate": 6.666666666666667e-06, "loss": 0.3278, "step": 50 }, { "epoch": 35.714285714285715, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.34991347789764404, "eval_runtime": 0.6712, "eval_samples_per_second": 214.545, "eval_steps_per_second": 4.47, "step": 50 }, { "epoch": 36.42857142857143, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.33839675784111023, "eval_runtime": 0.6365, "eval_samples_per_second": 226.231, "eval_steps_per_second": 4.713, "step": 51 }, { "epoch": 37.857142857142854, "eval_accuracy": 0.8541666666666666, "eval_loss": 0.3642105460166931, "eval_runtime": 0.6301, "eval_samples_per_second": 228.534, "eval_steps_per_second": 4.761, "step": 53 }, { "epoch": 38.57142857142857, "eval_accuracy": 0.8194444444444444, "eval_loss": 0.3996630907058716, "eval_runtime": 0.6609, "eval_samples_per_second": 217.89, "eval_steps_per_second": 4.539, "step": 54 }, { "epoch": 40.0, "eval_accuracy": 0.8402777777777778, "eval_loss": 0.3843066394329071, "eval_runtime": 0.6636, "eval_samples_per_second": 216.984, "eval_steps_per_second": 4.521, "step": 56 }, { "epoch": 40.714285714285715, "eval_accuracy": 0.8680555555555556, "eval_loss": 0.3675690293312073, "eval_runtime": 0.6194, "eval_samples_per_second": 232.479, "eval_steps_per_second": 4.843, "step": 57 }, { "epoch": 41.42857142857143, "eval_accuracy": 0.9027777777777778, "eval_loss": 0.3464236557483673, "eval_runtime": 0.6267, "eval_samples_per_second": 229.775, "eval_steps_per_second": 4.787, "step": 58 }, { "epoch": 42.857142857142854, "grad_norm": 6.519013404846191, "learning_rate": 5.333333333333334e-06, "loss": 0.3334, "step": 60 }, { "epoch": 42.857142857142854, "eval_accuracy": 0.8819444444444444, "eval_loss": 0.36175864934921265, "eval_runtime": 0.7106, "eval_samples_per_second": 202.636, "eval_steps_per_second": 4.222, "step": 60 }, { "epoch": 43.57142857142857, "eval_accuracy": 0.8194444444444444, "eval_loss": 0.40056005120277405, "eval_runtime": 0.6703, "eval_samples_per_second": 214.814, "eval_steps_per_second": 4.475, "step": 61 }, { "epoch": 45.0, "eval_accuracy": 0.7638888888888888, "eval_loss": 0.49312081933021545, "eval_runtime": 0.7051, "eval_samples_per_second": 204.23, "eval_steps_per_second": 4.255, "step": 63 }, { "epoch": 45.714285714285715, "eval_accuracy": 0.7708333333333334, "eval_loss": 0.48445218801498413, "eval_runtime": 0.71, "eval_samples_per_second": 202.808, "eval_steps_per_second": 4.225, "step": 64 }, { "epoch": 46.42857142857143, "eval_accuracy": 0.7916666666666666, "eval_loss": 0.4485209286212921, "eval_runtime": 0.6303, "eval_samples_per_second": 228.459, "eval_steps_per_second": 4.76, "step": 65 }, { "epoch": 47.857142857142854, "eval_accuracy": 0.8472222222222222, "eval_loss": 0.378328800201416, "eval_runtime": 0.6131, "eval_samples_per_second": 234.855, "eval_steps_per_second": 4.893, "step": 67 }, { "epoch": 48.57142857142857, "eval_accuracy": 0.8472222222222222, "eval_loss": 0.37234801054000854, "eval_runtime": 0.6727, "eval_samples_per_second": 214.05, "eval_steps_per_second": 4.459, "step": 68 }, { "epoch": 50.0, "grad_norm": 5.204492092132568, "learning_rate": 4.000000000000001e-06, "loss": 0.3334, "step": 70 }, { "epoch": 50.0, "eval_accuracy": 0.8125, "eval_loss": 0.407737672328949, "eval_runtime": 0.6505, "eval_samples_per_second": 221.382, "eval_steps_per_second": 4.612, "step": 70 }, { "epoch": 50.714285714285715, "eval_accuracy": 0.7986111111111112, "eval_loss": 0.4380877912044525, "eval_runtime": 0.7093, "eval_samples_per_second": 203.024, "eval_steps_per_second": 4.23, "step": 71 }, { "epoch": 51.42857142857143, "eval_accuracy": 0.7847222222222222, "eval_loss": 0.46269893646240234, "eval_runtime": 0.6731, "eval_samples_per_second": 213.937, "eval_steps_per_second": 4.457, "step": 72 }, { "epoch": 52.857142857142854, "eval_accuracy": 0.7986111111111112, "eval_loss": 0.44445788860321045, "eval_runtime": 0.6372, "eval_samples_per_second": 225.995, "eval_steps_per_second": 4.708, "step": 74 }, { "epoch": 53.57142857142857, "eval_accuracy": 0.8125, "eval_loss": 0.41410741209983826, "eval_runtime": 0.6728, "eval_samples_per_second": 214.026, "eval_steps_per_second": 4.459, "step": 75 }, { "epoch": 55.0, "eval_accuracy": 0.8680555555555556, "eval_loss": 0.3488573431968689, "eval_runtime": 0.6306, "eval_samples_per_second": 228.368, "eval_steps_per_second": 4.758, "step": 77 }, { "epoch": 55.714285714285715, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.33705562353134155, "eval_runtime": 0.6859, "eval_samples_per_second": 209.945, "eval_steps_per_second": 4.374, "step": 78 }, { "epoch": 56.42857142857143, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.3358408510684967, "eval_runtime": 0.6867, "eval_samples_per_second": 209.701, "eval_steps_per_second": 4.369, "step": 79 }, { "epoch": 57.142857142857146, "grad_norm": 5.973431587219238, "learning_rate": 2.666666666666667e-06, "loss": 0.3105, "step": 80 }, { "epoch": 57.857142857142854, "eval_accuracy": 0.8680555555555556, "eval_loss": 0.3538711965084076, "eval_runtime": 0.7307, "eval_samples_per_second": 197.072, "eval_steps_per_second": 4.106, "step": 81 }, { "epoch": 58.57142857142857, "eval_accuracy": 0.8541666666666666, "eval_loss": 0.3678491413593292, "eval_runtime": 0.6126, "eval_samples_per_second": 235.055, "eval_steps_per_second": 4.897, "step": 82 }, { "epoch": 60.0, "eval_accuracy": 0.8263888888888888, "eval_loss": 0.3930552899837494, "eval_runtime": 0.633, "eval_samples_per_second": 227.478, "eval_steps_per_second": 4.739, "step": 84 }, { "epoch": 60.714285714285715, "eval_accuracy": 0.8263888888888888, "eval_loss": 0.3938286006450653, "eval_runtime": 0.6516, "eval_samples_per_second": 220.996, "eval_steps_per_second": 4.604, "step": 85 }, { "epoch": 61.42857142857143, "eval_accuracy": 0.8472222222222222, "eval_loss": 0.3896949589252472, "eval_runtime": 0.6582, "eval_samples_per_second": 218.793, "eval_steps_per_second": 4.558, "step": 86 }, { "epoch": 62.857142857142854, "eval_accuracy": 0.8611111111111112, "eval_loss": 0.3637922406196594, "eval_runtime": 0.6651, "eval_samples_per_second": 216.505, "eval_steps_per_second": 4.511, "step": 88 }, { "epoch": 63.57142857142857, "eval_accuracy": 0.875, "eval_loss": 0.34960028529167175, "eval_runtime": 0.6331, "eval_samples_per_second": 227.443, "eval_steps_per_second": 4.738, "step": 89 }, { "epoch": 64.28571428571429, "grad_norm": 6.167888164520264, "learning_rate": 1.3333333333333334e-06, "loss": 0.3061, "step": 90 }, { "epoch": 65.0, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.3304632306098938, "eval_runtime": 0.6686, "eval_samples_per_second": 215.369, "eval_steps_per_second": 4.487, "step": 91 }, { "epoch": 65.71428571428571, "eval_accuracy": 0.9027777777777778, "eval_loss": 0.3283728361129761, "eval_runtime": 0.6659, "eval_samples_per_second": 216.261, "eval_steps_per_second": 4.505, "step": 92 }, { "epoch": 66.42857142857143, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.3283740282058716, "eval_runtime": 0.6699, "eval_samples_per_second": 214.972, "eval_steps_per_second": 4.479, "step": 93 }, { "epoch": 67.85714285714286, "eval_accuracy": 0.8958333333333334, "eval_loss": 0.33374664187431335, "eval_runtime": 0.668, "eval_samples_per_second": 215.572, "eval_steps_per_second": 4.491, "step": 95 }, { "epoch": 68.57142857142857, "eval_accuracy": 0.8888888888888888, "eval_loss": 0.33741050958633423, "eval_runtime": 0.6191, "eval_samples_per_second": 232.594, "eval_steps_per_second": 4.846, "step": 96 }, { "epoch": 70.0, "eval_accuracy": 0.875, "eval_loss": 0.34418821334838867, "eval_runtime": 0.6959, "eval_samples_per_second": 206.929, "eval_steps_per_second": 4.311, "step": 98 }, { "epoch": 70.71428571428571, "eval_accuracy": 0.875, "eval_loss": 0.34521356225013733, "eval_runtime": 0.6376, "eval_samples_per_second": 225.852, "eval_steps_per_second": 4.705, "step": 99 }, { "epoch": 71.42857142857143, "grad_norm": 5.671431541442871, "learning_rate": 0.0, "loss": 0.3137, "step": 100 }, { "epoch": 71.42857142857143, "eval_accuracy": 0.875, "eval_loss": 0.3459942042827606, "eval_runtime": 0.7141, "eval_samples_per_second": 201.642, "eval_steps_per_second": 4.201, "step": 100 }, { "epoch": 71.42857142857143, "step": 100, "total_flos": 5.956344520589353e+17, "train_loss": 0.420122013092041, "train_runtime": 326.0769, "train_samples_per_second": 130.644, "train_steps_per_second": 0.307 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.956344520589353e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }