|
{
|
|
"best_metric": 0.8225806451612904,
|
|
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-OT\\checkpoint-176",
|
|
"epoch": 36.36363636363637,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.91,
|
|
"eval_accuracy": 0.08064516129032258,
|
|
"eval_loss": 8.843916893005371,
|
|
"eval_runtime": 2.8151,
|
|
"eval_samples_per_second": 22.024,
|
|
"eval_steps_per_second": 1.421,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 7.5e-05,
|
|
"loss": 8.7922,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_accuracy": 0.08064516129032258,
|
|
"eval_loss": 8.001582145690918,
|
|
"eval_runtime": 2.1045,
|
|
"eval_samples_per_second": 29.461,
|
|
"eval_steps_per_second": 1.901,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 2.91,
|
|
"eval_accuracy": 0.08064516129032258,
|
|
"eval_loss": 6.000851154327393,
|
|
"eval_runtime": 2.0735,
|
|
"eval_samples_per_second": 29.901,
|
|
"eval_steps_per_second": 1.929,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 0.00015,
|
|
"loss": 6.5264,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.08064516129032258,
|
|
"eval_loss": 2.7431113719940186,
|
|
"eval_runtime": 2.1035,
|
|
"eval_samples_per_second": 29.475,
|
|
"eval_steps_per_second": 1.902,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 4.91,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.3018240928649902,
|
|
"eval_runtime": 2.0906,
|
|
"eval_samples_per_second": 29.656,
|
|
"eval_steps_per_second": 1.913,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 5.45,
|
|
"learning_rate": 0.00014166666666666665,
|
|
"loss": 2.16,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.2696114778518677,
|
|
"eval_runtime": 2.0785,
|
|
"eval_samples_per_second": 29.829,
|
|
"eval_steps_per_second": 1.924,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 6.91,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.2057440280914307,
|
|
"eval_runtime": 2.5671,
|
|
"eval_samples_per_second": 24.152,
|
|
"eval_steps_per_second": 1.558,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 7.27,
|
|
"learning_rate": 0.0001333333333333333,
|
|
"loss": 1.2876,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.2157402038574219,
|
|
"eval_runtime": 2.1295,
|
|
"eval_samples_per_second": 29.115,
|
|
"eval_steps_per_second": 1.878,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 8.91,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.245875597000122,
|
|
"eval_runtime": 2.141,
|
|
"eval_samples_per_second": 28.958,
|
|
"eval_steps_per_second": 1.868,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 9.09,
|
|
"learning_rate": 0.000125,
|
|
"loss": 1.2456,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.210959792137146,
|
|
"eval_runtime": 2.181,
|
|
"eval_samples_per_second": 28.427,
|
|
"eval_steps_per_second": 1.834,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 10.91,
|
|
"learning_rate": 0.00011666666666666665,
|
|
"loss": 1.1901,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 10.91,
|
|
"eval_accuracy": 0.45161290322580644,
|
|
"eval_loss": 1.1861207485198975,
|
|
"eval_runtime": 2.094,
|
|
"eval_samples_per_second": 29.608,
|
|
"eval_steps_per_second": 1.91,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.46774193548387094,
|
|
"eval_loss": 1.0847262144088745,
|
|
"eval_runtime": 2.212,
|
|
"eval_samples_per_second": 28.028,
|
|
"eval_steps_per_second": 1.808,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 12.73,
|
|
"learning_rate": 0.00010833333333333333,
|
|
"loss": 1.0665,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 12.91,
|
|
"eval_accuracy": 0.46774193548387094,
|
|
"eval_loss": 1.0943629741668701,
|
|
"eval_runtime": 2.08,
|
|
"eval_samples_per_second": 29.808,
|
|
"eval_steps_per_second": 1.923,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"eval_accuracy": 0.46774193548387094,
|
|
"eval_loss": 1.1853879690170288,
|
|
"eval_runtime": 2.093,
|
|
"eval_samples_per_second": 29.623,
|
|
"eval_steps_per_second": 1.911,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 14.55,
|
|
"learning_rate": 9.999999999999999e-05,
|
|
"loss": 1.033,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 14.91,
|
|
"eval_accuracy": 0.5,
|
|
"eval_loss": 1.025220274925232,
|
|
"eval_runtime": 2.4167,
|
|
"eval_samples_per_second": 25.655,
|
|
"eval_steps_per_second": 1.655,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.5161290322580645,
|
|
"eval_loss": 1.216417908668518,
|
|
"eval_runtime": 2.2296,
|
|
"eval_samples_per_second": 27.808,
|
|
"eval_steps_per_second": 1.794,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 16.36,
|
|
"learning_rate": 9.166666666666667e-05,
|
|
"loss": 1.0323,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 16.91,
|
|
"eval_accuracy": 0.5,
|
|
"eval_loss": 1.0642980337142944,
|
|
"eval_runtime": 2.199,
|
|
"eval_samples_per_second": 28.194,
|
|
"eval_steps_per_second": 1.819,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"eval_accuracy": 0.6612903225806451,
|
|
"eval_loss": 0.9802310466766357,
|
|
"eval_runtime": 2.1395,
|
|
"eval_samples_per_second": 28.979,
|
|
"eval_steps_per_second": 1.87,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 18.18,
|
|
"learning_rate": 8.333333333333333e-05,
|
|
"loss": 0.9329,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 18.91,
|
|
"eval_accuracy": 0.5967741935483871,
|
|
"eval_loss": 0.9474769830703735,
|
|
"eval_runtime": 2.179,
|
|
"eval_samples_per_second": 28.453,
|
|
"eval_steps_per_second": 1.836,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"learning_rate": 7.5e-05,
|
|
"loss": 0.8619,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.6451612903225806,
|
|
"eval_loss": 0.9114610552787781,
|
|
"eval_runtime": 2.125,
|
|
"eval_samples_per_second": 29.176,
|
|
"eval_steps_per_second": 1.882,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 20.91,
|
|
"eval_accuracy": 0.6451612903225806,
|
|
"eval_loss": 0.8893528580665588,
|
|
"eval_runtime": 2.2755,
|
|
"eval_samples_per_second": 27.246,
|
|
"eval_steps_per_second": 1.758,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 21.82,
|
|
"learning_rate": 6.666666666666666e-05,
|
|
"loss": 0.8019,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"eval_accuracy": 0.6935483870967742,
|
|
"eval_loss": 0.8276461958885193,
|
|
"eval_runtime": 2.182,
|
|
"eval_samples_per_second": 28.414,
|
|
"eval_steps_per_second": 1.833,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 22.91,
|
|
"eval_accuracy": 0.6774193548387096,
|
|
"eval_loss": 0.8156123757362366,
|
|
"eval_runtime": 2.1835,
|
|
"eval_samples_per_second": 28.394,
|
|
"eval_steps_per_second": 1.832,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 23.64,
|
|
"learning_rate": 5.8333333333333326e-05,
|
|
"loss": 0.7675,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.6290322580645161,
|
|
"eval_loss": 0.7928251624107361,
|
|
"eval_runtime": 2.7367,
|
|
"eval_samples_per_second": 22.655,
|
|
"eval_steps_per_second": 1.462,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 24.91,
|
|
"eval_accuracy": 0.7419354838709677,
|
|
"eval_loss": 0.7163397669792175,
|
|
"eval_runtime": 2.3511,
|
|
"eval_samples_per_second": 26.371,
|
|
"eval_steps_per_second": 1.701,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 25.45,
|
|
"learning_rate": 4.9999999999999996e-05,
|
|
"loss": 0.6762,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"eval_accuracy": 0.6774193548387096,
|
|
"eval_loss": 0.7387820482254028,
|
|
"eval_runtime": 2.261,
|
|
"eval_samples_per_second": 27.421,
|
|
"eval_steps_per_second": 1.769,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 26.91,
|
|
"eval_accuracy": 0.7580645161290323,
|
|
"eval_loss": 0.6518718004226685,
|
|
"eval_runtime": 2.2715,
|
|
"eval_samples_per_second": 27.294,
|
|
"eval_steps_per_second": 1.761,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 27.27,
|
|
"learning_rate": 4.1666666666666665e-05,
|
|
"loss": 0.6771,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.7419354838709677,
|
|
"eval_loss": 0.6709696054458618,
|
|
"eval_runtime": 2.6851,
|
|
"eval_samples_per_second": 23.09,
|
|
"eval_steps_per_second": 1.49,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 28.91,
|
|
"eval_accuracy": 0.7580645161290323,
|
|
"eval_loss": 0.6073653697967529,
|
|
"eval_runtime": 2.3635,
|
|
"eval_samples_per_second": 26.232,
|
|
"eval_steps_per_second": 1.692,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 29.09,
|
|
"learning_rate": 3.333333333333333e-05,
|
|
"loss": 0.6424,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"eval_accuracy": 0.7258064516129032,
|
|
"eval_loss": 0.672946035861969,
|
|
"eval_runtime": 2.2598,
|
|
"eval_samples_per_second": 27.436,
|
|
"eval_steps_per_second": 1.77,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 30.91,
|
|
"learning_rate": 2.4999999999999998e-05,
|
|
"loss": 0.6139,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 30.91,
|
|
"eval_accuracy": 0.7903225806451613,
|
|
"eval_loss": 0.5744480490684509,
|
|
"eval_runtime": 2.189,
|
|
"eval_samples_per_second": 28.323,
|
|
"eval_steps_per_second": 1.827,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.8225806451612904,
|
|
"eval_loss": 0.6192476749420166,
|
|
"eval_runtime": 2.142,
|
|
"eval_samples_per_second": 28.945,
|
|
"eval_steps_per_second": 1.867,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 32.73,
|
|
"learning_rate": 1.6666666666666664e-05,
|
|
"loss": 0.5713,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 32.91,
|
|
"eval_accuracy": 0.7903225806451613,
|
|
"eval_loss": 0.6452686190605164,
|
|
"eval_runtime": 2.1495,
|
|
"eval_samples_per_second": 28.844,
|
|
"eval_steps_per_second": 1.861,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 34.0,
|
|
"eval_accuracy": 0.7903225806451613,
|
|
"eval_loss": 0.6392035484313965,
|
|
"eval_runtime": 2.1965,
|
|
"eval_samples_per_second": 28.226,
|
|
"eval_steps_per_second": 1.821,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 34.55,
|
|
"learning_rate": 8.333333333333332e-06,
|
|
"loss": 0.5462,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 34.91,
|
|
"eval_accuracy": 0.8225806451612904,
|
|
"eval_loss": 0.5955818295478821,
|
|
"eval_runtime": 2.247,
|
|
"eval_samples_per_second": 27.592,
|
|
"eval_steps_per_second": 1.78,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"eval_accuracy": 0.8225806451612904,
|
|
"eval_loss": 0.5892814993858337,
|
|
"eval_runtime": 2.5326,
|
|
"eval_samples_per_second": 24.481,
|
|
"eval_steps_per_second": 1.579,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 36.36,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.5393,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 36.36,
|
|
"eval_accuracy": 0.8225806451612904,
|
|
"eval_loss": 0.5898378491401672,
|
|
"eval_runtime": 2.2395,
|
|
"eval_samples_per_second": 27.685,
|
|
"eval_steps_per_second": 1.786,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 36.36,
|
|
"step": 200,
|
|
"total_flos": 4.141200256480051e+17,
|
|
"train_loss": 1.5982162952423096,
|
|
"train_runtime": 635.1405,
|
|
"train_samples_per_second": 22.042,
|
|
"train_steps_per_second": 0.315
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 40,
|
|
"save_steps": 500,
|
|
"total_flos": 4.141200256480051e+17,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|