|
{ |
|
"best_metric": 0.9417692129092176, |
|
"best_model_checkpoint": "Crosswalk/dinov2/checkpoint-924", |
|
"epoch": 22.0, |
|
"eval_steps": 500, |
|
"global_step": 924, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24242424242424243, |
|
"grad_norm": 1809.6781005859375, |
|
"learning_rate": 9.70873786407767e-07, |
|
"loss": 4.7087, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.48484848484848486, |
|
"grad_norm": 190.5601806640625, |
|
"learning_rate": 1.941747572815534e-06, |
|
"loss": 3.034, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 76.29146575927734, |
|
"learning_rate": 2.912621359223301e-06, |
|
"loss": 2.0024, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": 75.1009750366211, |
|
"learning_rate": 3.883495145631068e-06, |
|
"loss": 1.4019, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2978227138519287, |
|
"eval_macro_f1": 0.8815037150933147, |
|
"eval_runtime": 7.5247, |
|
"eval_samples_per_second": 43.856, |
|
"eval_steps_per_second": 5.582, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.1939393939393939, |
|
"grad_norm": 86.90376281738281, |
|
"learning_rate": 4.854368932038836e-06, |
|
"loss": 0.7179, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.4363636363636363, |
|
"grad_norm": 118.144287109375, |
|
"learning_rate": 5.825242718446602e-06, |
|
"loss": 0.9737, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.6787878787878787, |
|
"grad_norm": 174.987548828125, |
|
"learning_rate": 6.79611650485437e-06, |
|
"loss": 1.3976, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.9212121212121214, |
|
"grad_norm": 35.86643981933594, |
|
"learning_rate": 7.766990291262136e-06, |
|
"loss": 0.967, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.22084859013557434, |
|
"eval_macro_f1": 0.9229339286881953, |
|
"eval_runtime": 5.8751, |
|
"eval_samples_per_second": 56.169, |
|
"eval_steps_per_second": 7.149, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.1454545454545455, |
|
"grad_norm": 232.57371520996094, |
|
"learning_rate": 8.737864077669904e-06, |
|
"loss": 1.323, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.3878787878787877, |
|
"grad_norm": 72.19599914550781, |
|
"learning_rate": 9.708737864077671e-06, |
|
"loss": 1.005, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.6303030303030304, |
|
"grad_norm": 55.63515853881836, |
|
"learning_rate": 9.924078091106291e-06, |
|
"loss": 0.6989, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.8727272727272726, |
|
"grad_norm": 56.631591796875, |
|
"learning_rate": 9.815618221258135e-06, |
|
"loss": 0.7527, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.3025018870830536, |
|
"eval_macro_f1": 0.9009343690194753, |
|
"eval_runtime": 6.0811, |
|
"eval_samples_per_second": 54.266, |
|
"eval_steps_per_second": 6.907, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.096969696969697, |
|
"grad_norm": 40.3350715637207, |
|
"learning_rate": 9.70715835140998e-06, |
|
"loss": 0.6221, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.3393939393939394, |
|
"grad_norm": 95.85454559326172, |
|
"learning_rate": 9.598698481561823e-06, |
|
"loss": 0.9071, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.581818181818182, |
|
"grad_norm": 94.18701934814453, |
|
"learning_rate": 9.490238611713667e-06, |
|
"loss": 0.7042, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.824242424242424, |
|
"grad_norm": 477.8069763183594, |
|
"learning_rate": 9.38177874186551e-06, |
|
"loss": 0.635, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.22365985810756683, |
|
"eval_macro_f1": 0.9049918736939866, |
|
"eval_runtime": 5.9294, |
|
"eval_samples_per_second": 55.655, |
|
"eval_steps_per_second": 7.083, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.048484848484849, |
|
"grad_norm": 110.27919006347656, |
|
"learning_rate": 9.273318872017354e-06, |
|
"loss": 1.1381, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.290909090909091, |
|
"grad_norm": 58.735958099365234, |
|
"learning_rate": 9.1648590021692e-06, |
|
"loss": 0.7225, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.533333333333333, |
|
"grad_norm": 75.20926666259766, |
|
"learning_rate": 9.056399132321042e-06, |
|
"loss": 0.4634, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.775757575757575, |
|
"grad_norm": 18.876136779785156, |
|
"learning_rate": 8.947939262472886e-06, |
|
"loss": 0.6293, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 8.281109809875488, |
|
"learning_rate": 8.83947939262473e-06, |
|
"loss": 0.6632, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.2299780696630478, |
|
"eval_macro_f1": 0.9176304185040354, |
|
"eval_runtime": 6.0281, |
|
"eval_samples_per_second": 54.744, |
|
"eval_steps_per_second": 6.967, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.242424242424242, |
|
"grad_norm": 13.756321907043457, |
|
"learning_rate": 8.731019522776574e-06, |
|
"loss": 0.4708, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.484848484848484, |
|
"grad_norm": 59.22605895996094, |
|
"learning_rate": 8.622559652928418e-06, |
|
"loss": 0.7127, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.7272727272727275, |
|
"grad_norm": 32.43043899536133, |
|
"learning_rate": 8.514099783080262e-06, |
|
"loss": 0.5682, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.96969696969697, |
|
"grad_norm": 54.722599029541016, |
|
"learning_rate": 8.405639913232104e-06, |
|
"loss": 0.8667, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.2767850160598755, |
|
"eval_macro_f1": 0.9210700618192522, |
|
"eval_runtime": 6.2004, |
|
"eval_samples_per_second": 53.223, |
|
"eval_steps_per_second": 6.774, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 6.193939393939394, |
|
"grad_norm": 18.175823211669922, |
|
"learning_rate": 8.29718004338395e-06, |
|
"loss": 0.6752, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.4363636363636365, |
|
"grad_norm": 56.679988861083984, |
|
"learning_rate": 8.188720173535792e-06, |
|
"loss": 0.3727, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.678787878787879, |
|
"grad_norm": 57.3917236328125, |
|
"learning_rate": 8.080260303687636e-06, |
|
"loss": 1.0167, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.921212121212121, |
|
"grad_norm": 42.186038970947266, |
|
"learning_rate": 7.97180043383948e-06, |
|
"loss": 0.9377, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.29274508357048035, |
|
"eval_macro_f1": 0.9138863000931967, |
|
"eval_runtime": 6.1213, |
|
"eval_samples_per_second": 53.91, |
|
"eval_steps_per_second": 6.861, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 7.1454545454545455, |
|
"grad_norm": 41.31782531738281, |
|
"learning_rate": 7.863340563991324e-06, |
|
"loss": 0.3818, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.387878787878788, |
|
"grad_norm": 4.223178863525391, |
|
"learning_rate": 7.754880694143168e-06, |
|
"loss": 0.4503, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.63030303030303, |
|
"grad_norm": 35.64258575439453, |
|
"learning_rate": 7.646420824295012e-06, |
|
"loss": 0.6038, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.872727272727273, |
|
"grad_norm": 37.91206359863281, |
|
"learning_rate": 7.537960954446856e-06, |
|
"loss": 0.5407, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.20143219828605652, |
|
"eval_macro_f1": 0.9357970705676355, |
|
"eval_runtime": 5.9715, |
|
"eval_samples_per_second": 55.263, |
|
"eval_steps_per_second": 7.033, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 8.096969696969698, |
|
"grad_norm": 9.571391105651855, |
|
"learning_rate": 7.429501084598699e-06, |
|
"loss": 0.3311, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.33939393939394, |
|
"grad_norm": 30.14655876159668, |
|
"learning_rate": 7.321041214750543e-06, |
|
"loss": 0.5367, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.581818181818182, |
|
"grad_norm": 125.38350677490234, |
|
"learning_rate": 7.212581344902386e-06, |
|
"loss": 0.4511, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.824242424242424, |
|
"grad_norm": 283.20819091796875, |
|
"learning_rate": 7.104121475054231e-06, |
|
"loss": 0.5474, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.329227477312088, |
|
"eval_macro_f1": 0.8817302125547928, |
|
"eval_runtime": 5.984, |
|
"eval_samples_per_second": 55.147, |
|
"eval_steps_per_second": 7.019, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 9.048484848484849, |
|
"grad_norm": 31.927379608154297, |
|
"learning_rate": 6.995661605206075e-06, |
|
"loss": 0.3963, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.290909090909091, |
|
"grad_norm": 10.10098934173584, |
|
"learning_rate": 6.887201735357918e-06, |
|
"loss": 0.445, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.533333333333333, |
|
"grad_norm": 1.947770118713379, |
|
"learning_rate": 6.778741865509761e-06, |
|
"loss": 0.5947, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.775757575757575, |
|
"grad_norm": 54.11802673339844, |
|
"learning_rate": 6.670281995661606e-06, |
|
"loss": 0.6001, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.004518165718764067, |
|
"learning_rate": 6.56182212581345e-06, |
|
"loss": 0.412, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.3594599962234497, |
|
"eval_macro_f1": 0.907735321528425, |
|
"eval_runtime": 6.0125, |
|
"eval_samples_per_second": 54.885, |
|
"eval_steps_per_second": 6.985, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.242424242424242, |
|
"grad_norm": 0.22337216138839722, |
|
"learning_rate": 6.453362255965293e-06, |
|
"loss": 0.2798, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 10.484848484848484, |
|
"grad_norm": 39.639984130859375, |
|
"learning_rate": 6.344902386117138e-06, |
|
"loss": 0.4377, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.727272727272727, |
|
"grad_norm": 53.85198211669922, |
|
"learning_rate": 6.236442516268981e-06, |
|
"loss": 0.2063, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 10.969696969696969, |
|
"grad_norm": 69.08942413330078, |
|
"learning_rate": 6.127982646420825e-06, |
|
"loss": 0.2884, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.2930862307548523, |
|
"eval_macro_f1": 0.9380839806371721, |
|
"eval_runtime": 6.0147, |
|
"eval_samples_per_second": 54.866, |
|
"eval_steps_per_second": 6.983, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 11.193939393939393, |
|
"grad_norm": 63.143218994140625, |
|
"learning_rate": 6.019522776572668e-06, |
|
"loss": 0.6075, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 11.436363636363636, |
|
"grad_norm": 7.950187683105469, |
|
"learning_rate": 5.911062906724513e-06, |
|
"loss": 0.2654, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 11.67878787878788, |
|
"grad_norm": 82.30758666992188, |
|
"learning_rate": 5.802603036876356e-06, |
|
"loss": 0.2474, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 11.921212121212122, |
|
"grad_norm": 7.340689182281494, |
|
"learning_rate": 5.6941431670282e-06, |
|
"loss": 0.2405, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.3316686451435089, |
|
"eval_macro_f1": 0.9209216589861751, |
|
"eval_runtime": 5.8916, |
|
"eval_samples_per_second": 56.012, |
|
"eval_steps_per_second": 7.129, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 12.145454545454545, |
|
"grad_norm": 37.141632080078125, |
|
"learning_rate": 5.585683297180043e-06, |
|
"loss": 0.3349, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 12.387878787878789, |
|
"grad_norm": 34.024383544921875, |
|
"learning_rate": 5.477223427331888e-06, |
|
"loss": 0.1742, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 12.63030303030303, |
|
"grad_norm": 46.10781478881836, |
|
"learning_rate": 5.368763557483731e-06, |
|
"loss": 0.2115, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 12.872727272727273, |
|
"grad_norm": 126.67015838623047, |
|
"learning_rate": 5.260303687635575e-06, |
|
"loss": 0.8788, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.37741926312446594, |
|
"eval_macro_f1": 0.9058106453305834, |
|
"eval_runtime": 6.6329, |
|
"eval_samples_per_second": 49.752, |
|
"eval_steps_per_second": 6.332, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 13.096969696969698, |
|
"grad_norm": 35.44662094116211, |
|
"learning_rate": 5.151843817787418e-06, |
|
"loss": 0.5591, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 13.33939393939394, |
|
"grad_norm": 57.34544372558594, |
|
"learning_rate": 5.043383947939263e-06, |
|
"loss": 0.213, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 13.581818181818182, |
|
"grad_norm": 15.285223960876465, |
|
"learning_rate": 4.934924078091107e-06, |
|
"loss": 0.203, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 13.824242424242424, |
|
"grad_norm": 28.99003028869629, |
|
"learning_rate": 4.82646420824295e-06, |
|
"loss": 0.4163, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.39865490794181824, |
|
"eval_macro_f1": 0.9196508840275697, |
|
"eval_runtime": 5.8701, |
|
"eval_samples_per_second": 56.217, |
|
"eval_steps_per_second": 7.155, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 14.048484848484849, |
|
"grad_norm": 346.8255310058594, |
|
"learning_rate": 4.718004338394794e-06, |
|
"loss": 0.26, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 14.290909090909091, |
|
"grad_norm": 38.04654312133789, |
|
"learning_rate": 4.609544468546638e-06, |
|
"loss": 0.3813, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.533333333333333, |
|
"grad_norm": 34.71643829345703, |
|
"learning_rate": 4.501084598698482e-06, |
|
"loss": 0.0974, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 14.775757575757575, |
|
"grad_norm": 34.031890869140625, |
|
"learning_rate": 4.392624728850326e-06, |
|
"loss": 0.4881, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.0002771662548184395, |
|
"learning_rate": 4.284164859002169e-06, |
|
"loss": 0.4126, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.35451531410217285, |
|
"eval_macro_f1": 0.9235679411519468, |
|
"eval_runtime": 6.0428, |
|
"eval_samples_per_second": 54.611, |
|
"eval_steps_per_second": 6.95, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 15.242424242424242, |
|
"grad_norm": 82.48748779296875, |
|
"learning_rate": 4.175704989154013e-06, |
|
"loss": 0.4444, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 15.484848484848484, |
|
"grad_norm": 0.2618753910064697, |
|
"learning_rate": 4.067245119305857e-06, |
|
"loss": 0.2083, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 15.727272727272727, |
|
"grad_norm": 98.34405517578125, |
|
"learning_rate": 3.958785249457701e-06, |
|
"loss": 0.4785, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 15.969696969696969, |
|
"grad_norm": 0.37142229080200195, |
|
"learning_rate": 3.8503253796095445e-06, |
|
"loss": 0.1583, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.38117873668670654, |
|
"eval_macro_f1": 0.9268860086407444, |
|
"eval_runtime": 6.9311, |
|
"eval_samples_per_second": 47.612, |
|
"eval_steps_per_second": 6.06, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 16.193939393939395, |
|
"grad_norm": 40.54330825805664, |
|
"learning_rate": 3.741865509761389e-06, |
|
"loss": 0.0774, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 16.436363636363637, |
|
"grad_norm": 0.23675695061683655, |
|
"learning_rate": 3.6334056399132324e-06, |
|
"loss": 0.1639, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 16.67878787878788, |
|
"grad_norm": 47.12529373168945, |
|
"learning_rate": 3.5249457700650764e-06, |
|
"loss": 0.306, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 16.921212121212122, |
|
"grad_norm": 0.3993530571460724, |
|
"learning_rate": 3.41648590021692e-06, |
|
"loss": 0.2376, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.4087267816066742, |
|
"eval_macro_f1": 0.9295990205081115, |
|
"eval_runtime": 6.1306, |
|
"eval_samples_per_second": 53.828, |
|
"eval_steps_per_second": 6.851, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 17.145454545454545, |
|
"grad_norm": 0.34205177426338196, |
|
"learning_rate": 3.308026030368764e-06, |
|
"loss": 0.0332, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 17.387878787878787, |
|
"grad_norm": 0.5112647414207458, |
|
"learning_rate": 3.1995661605206075e-06, |
|
"loss": 0.1332, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 17.63030303030303, |
|
"grad_norm": 120.2950439453125, |
|
"learning_rate": 3.0911062906724515e-06, |
|
"loss": 0.2503, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 17.87272727272727, |
|
"grad_norm": 223.04759216308594, |
|
"learning_rate": 2.982646420824295e-06, |
|
"loss": 0.2703, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.43362897634506226, |
|
"eval_macro_f1": 0.9264924264924266, |
|
"eval_runtime": 5.861, |
|
"eval_samples_per_second": 56.305, |
|
"eval_steps_per_second": 7.166, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 18.096969696969698, |
|
"grad_norm": 54.66193771362305, |
|
"learning_rate": 2.874186550976139e-06, |
|
"loss": 0.1274, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 18.33939393939394, |
|
"grad_norm": 54.846466064453125, |
|
"learning_rate": 2.765726681127983e-06, |
|
"loss": 0.2751, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 18.581818181818182, |
|
"grad_norm": 53.97863006591797, |
|
"learning_rate": 2.6572668112798266e-06, |
|
"loss": 0.359, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 18.824242424242424, |
|
"grad_norm": 81.63549041748047, |
|
"learning_rate": 2.5488069414316706e-06, |
|
"loss": 0.1819, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.3480012118816376, |
|
"eval_macro_f1": 0.9236528192931639, |
|
"eval_runtime": 7.0471, |
|
"eval_samples_per_second": 46.828, |
|
"eval_steps_per_second": 5.96, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 19.048484848484847, |
|
"grad_norm": 41.54087448120117, |
|
"learning_rate": 2.440347071583514e-06, |
|
"loss": 0.6373, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.29090909090909, |
|
"grad_norm": 9.001028060913086, |
|
"learning_rate": 2.331887201735358e-06, |
|
"loss": 0.2971, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 19.533333333333335, |
|
"grad_norm": 114.6279525756836, |
|
"learning_rate": 2.2234273318872017e-06, |
|
"loss": 0.1943, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 19.775757575757577, |
|
"grad_norm": 18.022676467895508, |
|
"learning_rate": 2.1149674620390457e-06, |
|
"loss": 0.1207, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0001882202341221273, |
|
"learning_rate": 2.0065075921908892e-06, |
|
"loss": 0.1324, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.4493299424648285, |
|
"eval_macro_f1": 0.9384902143522833, |
|
"eval_runtime": 6.0147, |
|
"eval_samples_per_second": 54.865, |
|
"eval_steps_per_second": 6.983, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 20.242424242424242, |
|
"grad_norm": 13.740226745605469, |
|
"learning_rate": 1.8980477223427332e-06, |
|
"loss": 0.294, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 20.484848484848484, |
|
"grad_norm": 6.870513916015625, |
|
"learning_rate": 1.7895878524945772e-06, |
|
"loss": 0.1323, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 20.727272727272727, |
|
"grad_norm": 2.74729585647583, |
|
"learning_rate": 1.681127982646421e-06, |
|
"loss": 0.019, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 20.96969696969697, |
|
"grad_norm": 17.18338966369629, |
|
"learning_rate": 1.572668112798265e-06, |
|
"loss": 0.1312, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.40448498725891113, |
|
"eval_macro_f1": 0.9384902143522833, |
|
"eval_runtime": 6.133, |
|
"eval_samples_per_second": 53.808, |
|
"eval_steps_per_second": 6.848, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 21.193939393939395, |
|
"grad_norm": 108.9833755493164, |
|
"learning_rate": 1.4642082429501087e-06, |
|
"loss": 0.2499, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 21.436363636363637, |
|
"grad_norm": 0.291847825050354, |
|
"learning_rate": 1.3557483731019525e-06, |
|
"loss": 0.1708, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 21.67878787878788, |
|
"grad_norm": 35.08168029785156, |
|
"learning_rate": 1.2472885032537963e-06, |
|
"loss": 0.0802, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 21.921212121212122, |
|
"grad_norm": 0.05401836335659027, |
|
"learning_rate": 1.13882863340564e-06, |
|
"loss": 0.1662, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.3166828453540802, |
|
"eval_macro_f1": 0.9417692129092176, |
|
"eval_runtime": 6.0442, |
|
"eval_samples_per_second": 54.598, |
|
"eval_steps_per_second": 6.949, |
|
"step": 924 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1025, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0371596050603966e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|