{ "best_metric": 0.5740740740740741, "best_model_checkpoint": "./results/Vit-CBIS/checkpoint-330", "epoch": 3.0, "eval_steps": 500, "global_step": 495, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06060606060606061, "grad_norm": 0.9134721755981445, "learning_rate": 2.9393939393939394e-05, "loss": 0.6735, "step": 10 }, { "epoch": 0.12121212121212122, "grad_norm": 0.41935786604881287, "learning_rate": 2.8787878787878788e-05, "loss": 0.7149, "step": 20 }, { "epoch": 0.18181818181818182, "grad_norm": 2.244313955307007, "learning_rate": 2.8181818181818185e-05, "loss": 0.6885, "step": 30 }, { "epoch": 0.24242424242424243, "grad_norm": 0.9235630035400391, "learning_rate": 2.7575757575757578e-05, "loss": 0.6889, "step": 40 }, { "epoch": 0.30303030303030304, "grad_norm": 0.14859230816364288, "learning_rate": 2.696969696969697e-05, "loss": 0.697, "step": 50 }, { "epoch": 0.36363636363636365, "grad_norm": 1.2009950876235962, "learning_rate": 2.6363636363636365e-05, "loss": 0.6882, "step": 60 }, { "epoch": 0.42424242424242425, "grad_norm": 0.5788372159004211, "learning_rate": 2.575757575757576e-05, "loss": 0.6862, "step": 70 }, { "epoch": 0.48484848484848486, "grad_norm": 0.324853777885437, "learning_rate": 2.5151515151515152e-05, "loss": 0.6898, "step": 80 }, { "epoch": 0.5454545454545454, "grad_norm": 0.8690597414970398, "learning_rate": 2.454545454545455e-05, "loss": 0.6813, "step": 90 }, { "epoch": 0.6060606060606061, "grad_norm": 0.9127840995788574, "learning_rate": 2.3939393939393942e-05, "loss": 0.7099, "step": 100 }, { "epoch": 0.6666666666666666, "grad_norm": 0.8866621851921082, "learning_rate": 2.3333333333333336e-05, "loss": 0.6999, "step": 110 }, { "epoch": 0.7272727272727273, "grad_norm": 0.705461323261261, "learning_rate": 2.272727272727273e-05, "loss": 0.6943, "step": 120 }, { "epoch": 0.7878787878787878, "grad_norm": 1.2404519319534302, "learning_rate": 2.212121212121212e-05, "loss": 0.6866, "step": 130 }, { "epoch": 0.8484848484848485, "grad_norm": 0.7197526693344116, "learning_rate": 2.1515151515151513e-05, "loss": 0.6921, "step": 140 }, { "epoch": 0.9090909090909091, "grad_norm": 0.5975974798202515, "learning_rate": 2.090909090909091e-05, "loss": 0.6876, "step": 150 }, { "epoch": 0.9696969696969697, "grad_norm": 0.814386248588562, "learning_rate": 2.0303030303030303e-05, "loss": 0.7022, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.4470899470899471, "eval_loss": 0.6970731616020203, "eval_runtime": 35.3817, "eval_samples_per_second": 10.683, "eval_steps_per_second": 1.357, "step": 165 }, { "epoch": 1.0303030303030303, "grad_norm": 0.6936383843421936, "learning_rate": 1.9696969696969697e-05, "loss": 0.6918, "step": 170 }, { "epoch": 1.0909090909090908, "grad_norm": 0.5651612281799316, "learning_rate": 1.909090909090909e-05, "loss": 0.6903, "step": 180 }, { "epoch": 1.1515151515151516, "grad_norm": 0.1796492338180542, "learning_rate": 1.8484848484848484e-05, "loss": 0.7095, "step": 190 }, { "epoch": 1.2121212121212122, "grad_norm": 0.6916122436523438, "learning_rate": 1.7878787878787877e-05, "loss": 0.6983, "step": 200 }, { "epoch": 1.2727272727272727, "grad_norm": 1.9430723190307617, "learning_rate": 1.7272727272727274e-05, "loss": 0.6975, "step": 210 }, { "epoch": 1.3333333333333333, "grad_norm": 0.5348889827728271, "learning_rate": 1.6666666666666667e-05, "loss": 0.6886, "step": 220 }, { "epoch": 1.393939393939394, "grad_norm": 0.644706130027771, "learning_rate": 1.606060606060606e-05, "loss": 0.6841, "step": 230 }, { "epoch": 1.4545454545454546, "grad_norm": 2.1170523166656494, "learning_rate": 1.5454545454545454e-05, "loss": 0.7109, "step": 240 }, { "epoch": 1.5151515151515151, "grad_norm": 0.6115465760231018, "learning_rate": 1.484848484848485e-05, "loss": 0.6882, "step": 250 }, { "epoch": 1.5757575757575757, "grad_norm": 0.8241686820983887, "learning_rate": 1.4242424242424243e-05, "loss": 0.6981, "step": 260 }, { "epoch": 1.6363636363636362, "grad_norm": 1.836000680923462, "learning_rate": 1.3636363636363637e-05, "loss": 0.6897, "step": 270 }, { "epoch": 1.696969696969697, "grad_norm": 0.6261163949966431, "learning_rate": 1.3030303030303032e-05, "loss": 0.6932, "step": 280 }, { "epoch": 1.7575757575757576, "grad_norm": 0.7731136679649353, "learning_rate": 1.2424242424242425e-05, "loss": 0.6859, "step": 290 }, { "epoch": 1.8181818181818183, "grad_norm": 0.28496983647346497, "learning_rate": 1.1818181818181819e-05, "loss": 0.6845, "step": 300 }, { "epoch": 1.878787878787879, "grad_norm": 0.30313462018966675, "learning_rate": 1.1212121212121212e-05, "loss": 0.6861, "step": 310 }, { "epoch": 1.9393939393939394, "grad_norm": 0.7996814846992493, "learning_rate": 1.0606060606060606e-05, "loss": 0.6988, "step": 320 }, { "epoch": 2.0, "grad_norm": 1.020075798034668, "learning_rate": 9.999999999999999e-06, "loss": 0.6895, "step": 330 }, { "epoch": 2.0, "eval_accuracy": 0.5740740740740741, "eval_loss": 0.6877079606056213, "eval_runtime": 35.5495, "eval_samples_per_second": 10.633, "eval_steps_per_second": 1.35, "step": 330 }, { "epoch": 2.0606060606060606, "grad_norm": 0.3215593695640564, "learning_rate": 9.393939393939394e-06, "loss": 0.7026, "step": 340 }, { "epoch": 2.121212121212121, "grad_norm": 0.6477654576301575, "learning_rate": 8.787878787878788e-06, "loss": 0.6873, "step": 350 }, { "epoch": 2.1818181818181817, "grad_norm": 0.27149632573127747, "learning_rate": 8.181818181818181e-06, "loss": 0.6823, "step": 360 }, { "epoch": 2.242424242424242, "grad_norm": 0.7159335017204285, "learning_rate": 7.5757575757575764e-06, "loss": 0.7014, "step": 370 }, { "epoch": 2.303030303030303, "grad_norm": 0.2240850031375885, "learning_rate": 6.96969696969697e-06, "loss": 0.6903, "step": 380 }, { "epoch": 2.3636363636363638, "grad_norm": 1.4085216522216797, "learning_rate": 6.363636363636364e-06, "loss": 0.7011, "step": 390 }, { "epoch": 2.4242424242424243, "grad_norm": 0.6638109087944031, "learning_rate": 5.757575757575758e-06, "loss": 0.6842, "step": 400 }, { "epoch": 2.484848484848485, "grad_norm": 0.7225008606910706, "learning_rate": 5.151515151515151e-06, "loss": 0.6887, "step": 410 }, { "epoch": 2.5454545454545454, "grad_norm": 0.23257039487361908, "learning_rate": 4.5454545454545455e-06, "loss": 0.6993, "step": 420 }, { "epoch": 2.606060606060606, "grad_norm": 0.1906505525112152, "learning_rate": 3.93939393939394e-06, "loss": 0.6972, "step": 430 }, { "epoch": 2.6666666666666665, "grad_norm": 0.487804651260376, "learning_rate": 3.3333333333333333e-06, "loss": 0.6879, "step": 440 }, { "epoch": 2.7272727272727275, "grad_norm": 0.8791880011558533, "learning_rate": 2.7272727272727272e-06, "loss": 0.6917, "step": 450 }, { "epoch": 2.787878787878788, "grad_norm": 0.1857946664094925, "learning_rate": 2.121212121212121e-06, "loss": 0.6911, "step": 460 }, { "epoch": 2.8484848484848486, "grad_norm": 1.3687998056411743, "learning_rate": 1.5151515151515152e-06, "loss": 0.6889, "step": 470 }, { "epoch": 2.909090909090909, "grad_norm": 0.6968662738800049, "learning_rate": 9.090909090909091e-07, "loss": 0.6969, "step": 480 }, { "epoch": 2.9696969696969697, "grad_norm": 0.19741572439670563, "learning_rate": 3.0303030303030305e-07, "loss": 0.6969, "step": 490 }, { "epoch": 3.0, "eval_accuracy": 0.5264550264550265, "eval_loss": 0.6918376684188843, "eval_runtime": 35.4925, "eval_samples_per_second": 10.65, "eval_steps_per_second": 1.352, "step": 495 }, { "epoch": 3.0, "step": 495, "total_flos": 3.064033269360968e+17, "train_loss": 0.6929814497629802, "train_runtime": 561.8993, "train_samples_per_second": 7.037, "train_steps_per_second": 0.881 } ], "logging_steps": 10, "max_steps": 495, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.064033269360968e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }