{ "best_metric": 0.7613636363636364, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV40/checkpoint-100", "epoch": 30.0, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.3409090909090909, "eval_loss": 1.5349395275115967, "eval_runtime": 2.5542, "eval_samples_per_second": 34.453, "eval_steps_per_second": 1.175, "step": 4 }, { "epoch": 2.0, "eval_accuracy": 0.4431818181818182, "eval_loss": 1.321252703666687, "eval_runtime": 2.354, "eval_samples_per_second": 37.383, "eval_steps_per_second": 1.274, "step": 8 }, { "epoch": 3.0, "grad_norm": 8.033442497253418, "learning_rate": 5e-05, "loss": 4.7629, "step": 12 }, { "epoch": 3.0, "eval_accuracy": 0.4431818181818182, "eval_loss": 1.2540555000305176, "eval_runtime": 2.642, "eval_samples_per_second": 33.308, "eval_steps_per_second": 1.136, "step": 12 }, { "epoch": 4.0, "eval_accuracy": 0.6022727272727273, "eval_loss": 1.2072031497955322, "eval_runtime": 1.7756, "eval_samples_per_second": 49.56, "eval_steps_per_second": 1.69, "step": 16 }, { "epoch": 5.0, "eval_accuracy": 0.6363636363636364, "eval_loss": 1.1313027143478394, "eval_runtime": 1.6972, "eval_samples_per_second": 51.852, "eval_steps_per_second": 1.768, "step": 20 }, { "epoch": 6.0, "grad_norm": 8.113847732543945, "learning_rate": 4.849231551964771e-05, "loss": 3.7987, "step": 24 }, { "epoch": 6.0, "eval_accuracy": 0.6477272727272727, "eval_loss": 1.0712097883224487, "eval_runtime": 1.7097, "eval_samples_per_second": 51.471, "eval_steps_per_second": 1.755, "step": 24 }, { "epoch": 7.0, "eval_accuracy": 0.6590909090909091, "eval_loss": 0.967653214931488, "eval_runtime": 1.7586, "eval_samples_per_second": 50.041, "eval_steps_per_second": 1.706, "step": 28 }, { "epoch": 8.0, "eval_accuracy": 0.7159090909090909, "eval_loss": 0.8655213713645935, "eval_runtime": 1.6895, "eval_samples_per_second": 52.087, "eval_steps_per_second": 1.776, "step": 32 }, { "epoch": 9.0, "grad_norm": 13.333137512207031, "learning_rate": 4.415111107797445e-05, "loss": 3.0437, "step": 36 }, { "epoch": 9.0, "eval_accuracy": 0.6818181818181818, "eval_loss": 0.8563566207885742, "eval_runtime": 1.7069, "eval_samples_per_second": 51.555, "eval_steps_per_second": 1.758, "step": 36 }, { "epoch": 10.0, "eval_accuracy": 0.6818181818181818, "eval_loss": 0.8003210425376892, "eval_runtime": 1.7037, "eval_samples_per_second": 51.653, "eval_steps_per_second": 1.761, "step": 40 }, { "epoch": 11.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.7986971139907837, "eval_runtime": 2.4009, "eval_samples_per_second": 36.653, "eval_steps_per_second": 1.25, "step": 44 }, { "epoch": 12.0, "grad_norm": 16.13611602783203, "learning_rate": 3.7500000000000003e-05, "loss": 2.4867, "step": 48 }, { "epoch": 12.0, "eval_accuracy": 0.7159090909090909, "eval_loss": 0.7619297504425049, "eval_runtime": 2.3634, "eval_samples_per_second": 37.235, "eval_steps_per_second": 1.269, "step": 48 }, { "epoch": 13.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.7425692081451416, "eval_runtime": 1.785, "eval_samples_per_second": 49.3, "eval_steps_per_second": 1.681, "step": 52 }, { "epoch": 14.0, "eval_accuracy": 0.6931818181818182, "eval_loss": 0.7491652965545654, "eval_runtime": 1.6916, "eval_samples_per_second": 52.023, "eval_steps_per_second": 1.774, "step": 56 }, { "epoch": 15.0, "grad_norm": 18.206378936767578, "learning_rate": 2.9341204441673266e-05, "loss": 2.147, "step": 60 }, { "epoch": 15.0, "eval_accuracy": 0.7159090909090909, "eval_loss": 0.7827097177505493, "eval_runtime": 1.681, "eval_samples_per_second": 52.349, "eval_steps_per_second": 1.785, "step": 60 }, { "epoch": 16.0, "eval_accuracy": 0.7045454545454546, "eval_loss": 0.7509434223175049, "eval_runtime": 1.7041, "eval_samples_per_second": 51.641, "eval_steps_per_second": 1.76, "step": 64 }, { "epoch": 17.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.7364481091499329, "eval_runtime": 1.6978, "eval_samples_per_second": 51.831, "eval_steps_per_second": 1.767, "step": 68 }, { "epoch": 18.0, "grad_norm": 16.78498649597168, "learning_rate": 2.0658795558326743e-05, "loss": 1.8443, "step": 72 }, { "epoch": 18.0, "eval_accuracy": 0.7159090909090909, "eval_loss": 0.770459771156311, "eval_runtime": 1.7059, "eval_samples_per_second": 51.585, "eval_steps_per_second": 1.759, "step": 72 }, { "epoch": 19.0, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.7515316009521484, "eval_runtime": 1.7077, "eval_samples_per_second": 51.532, "eval_steps_per_second": 1.757, "step": 76 }, { "epoch": 20.0, "eval_accuracy": 0.7386363636363636, "eval_loss": 0.747029721736908, "eval_runtime": 1.8467, "eval_samples_per_second": 47.652, "eval_steps_per_second": 1.625, "step": 80 }, { "epoch": 21.0, "grad_norm": 14.945952415466309, "learning_rate": 1.2500000000000006e-05, "loss": 1.659, "step": 84 }, { "epoch": 21.0, "eval_accuracy": 0.75, "eval_loss": 0.7494531273841858, "eval_runtime": 2.3744, "eval_samples_per_second": 37.062, "eval_steps_per_second": 1.263, "step": 84 }, { "epoch": 22.0, "eval_accuracy": 0.75, "eval_loss": 0.7236538529396057, "eval_runtime": 2.3525, "eval_samples_per_second": 37.408, "eval_steps_per_second": 1.275, "step": 88 }, { "epoch": 23.0, "eval_accuracy": 0.75, "eval_loss": 0.744009256362915, "eval_runtime": 2.2107, "eval_samples_per_second": 39.807, "eval_steps_per_second": 1.357, "step": 92 }, { "epoch": 24.0, "grad_norm": 21.294570922851562, "learning_rate": 5.848888922025553e-06, "loss": 1.5303, "step": 96 }, { "epoch": 24.0, "eval_accuracy": 0.75, "eval_loss": 0.736692488193512, "eval_runtime": 1.7151, "eval_samples_per_second": 51.31, "eval_steps_per_second": 1.749, "step": 96 }, { "epoch": 25.0, "eval_accuracy": 0.7613636363636364, "eval_loss": 0.7427918910980225, "eval_runtime": 1.6602, "eval_samples_per_second": 53.006, "eval_steps_per_second": 1.807, "step": 100 }, { "epoch": 26.0, "eval_accuracy": 0.75, "eval_loss": 0.7406925559043884, "eval_runtime": 1.671, "eval_samples_per_second": 52.662, "eval_steps_per_second": 1.795, "step": 104 }, { "epoch": 27.0, "grad_norm": 18.939655303955078, "learning_rate": 1.5076844803522922e-06, "loss": 1.4305, "step": 108 }, { "epoch": 27.0, "eval_accuracy": 0.75, "eval_loss": 0.740644633769989, "eval_runtime": 1.6889, "eval_samples_per_second": 52.104, "eval_steps_per_second": 1.776, "step": 108 }, { "epoch": 28.0, "eval_accuracy": 0.75, "eval_loss": 0.7422773241996765, "eval_runtime": 1.6858, "eval_samples_per_second": 52.2, "eval_steps_per_second": 1.78, "step": 112 }, { "epoch": 29.0, "eval_accuracy": 0.75, "eval_loss": 0.7426608800888062, "eval_runtime": 1.7009, "eval_samples_per_second": 51.738, "eval_steps_per_second": 1.764, "step": 116 }, { "epoch": 30.0, "grad_norm": 13.808012962341309, "learning_rate": 0.0, "loss": 1.3529, "step": 120 }, { "epoch": 30.0, "eval_accuracy": 0.75, "eval_loss": 0.7427504062652588, "eval_runtime": 2.6552, "eval_samples_per_second": 33.142, "eval_steps_per_second": 1.13, "step": 120 }, { "epoch": 30.0, "step": 120, "total_flos": 3.982534914657485e+17, "train_loss": 2.4056013425191245, "train_runtime": 469.8187, "train_samples_per_second": 34.737, "train_steps_per_second": 0.255 } ], "logging_steps": 12, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.982534914657485e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }