codebyzeb's picture
Model save
032e7bb
raw
history blame
45.1 kB
{
"best_metric": 0.7334500551223755,
"best_model_checkpoint": "../../checkpoints/baseline/default-baseline-uncleaned/lm_model/finetune/mnli-mm/checkpoint-28000",
"epoch": 7.389162561576355,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_accuracy": 0.3381603956222534,
"eval_loss": 1.0937632322311401,
"eval_runtime": 7.1539,
"eval_samples_per_second": 878.405,
"eval_steps_per_second": 109.87,
"step": 200
},
{
"epoch": 0.1,
"eval_accuracy": 0.5,
"eval_loss": 1.0056557655334473,
"eval_runtime": 7.136,
"eval_samples_per_second": 880.608,
"eval_steps_per_second": 110.146,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 4.938423645320197e-05,
"loss": 1.065,
"step": 500
},
{
"epoch": 0.15,
"eval_accuracy": 0.5747931003570557,
"eval_loss": 0.9154016971588135,
"eval_runtime": 7.1437,
"eval_samples_per_second": 879.654,
"eval_steps_per_second": 110.027,
"step": 600
},
{
"epoch": 0.2,
"eval_accuracy": 0.5988224148750305,
"eval_loss": 0.8770188689231873,
"eval_runtime": 7.1559,
"eval_samples_per_second": 878.161,
"eval_steps_per_second": 109.84,
"step": 800
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.9452,
"step": 1000
},
{
"epoch": 0.25,
"eval_accuracy": 0.6098026633262634,
"eval_loss": 0.863080620765686,
"eval_runtime": 7.1512,
"eval_samples_per_second": 878.732,
"eval_steps_per_second": 109.911,
"step": 1000
},
{
"epoch": 0.3,
"eval_accuracy": 0.619191586971283,
"eval_loss": 0.8449499011039734,
"eval_runtime": 7.1301,
"eval_samples_per_second": 881.33,
"eval_steps_per_second": 110.236,
"step": 1200
},
{
"epoch": 0.34,
"eval_accuracy": 0.630808413028717,
"eval_loss": 0.8298859596252441,
"eval_runtime": 7.1347,
"eval_samples_per_second": 880.768,
"eval_steps_per_second": 110.166,
"step": 1400
},
{
"epoch": 0.37,
"learning_rate": 4.8152709359605915e-05,
"loss": 0.9048,
"step": 1500
},
{
"epoch": 0.39,
"eval_accuracy": 0.6325588822364807,
"eval_loss": 0.8225136995315552,
"eval_runtime": 7.1209,
"eval_samples_per_second": 882.468,
"eval_steps_per_second": 110.379,
"step": 1600
},
{
"epoch": 0.44,
"eval_accuracy": 0.6378102898597717,
"eval_loss": 0.8134775161743164,
"eval_runtime": 7.1392,
"eval_samples_per_second": 880.211,
"eval_steps_per_second": 110.096,
"step": 1800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.8715,
"step": 2000
},
{
"epoch": 0.49,
"eval_accuracy": 0.6421069502830505,
"eval_loss": 0.8081309795379639,
"eval_runtime": 7.115,
"eval_samples_per_second": 883.199,
"eval_steps_per_second": 110.47,
"step": 2000
},
{
"epoch": 0.54,
"eval_accuracy": 0.6468809843063354,
"eval_loss": 0.7986512184143066,
"eval_runtime": 7.1565,
"eval_samples_per_second": 878.086,
"eval_steps_per_second": 109.831,
"step": 2200
},
{
"epoch": 0.59,
"eval_accuracy": 0.6494271159172058,
"eval_loss": 0.7892561554908752,
"eval_runtime": 7.1252,
"eval_samples_per_second": 881.942,
"eval_steps_per_second": 110.313,
"step": 2400
},
{
"epoch": 0.62,
"learning_rate": 4.6921182266009855e-05,
"loss": 0.8542,
"step": 2500
},
{
"epoch": 0.64,
"eval_accuracy": 0.6537237167358398,
"eval_loss": 0.7885516881942749,
"eval_runtime": 7.1352,
"eval_samples_per_second": 880.701,
"eval_steps_per_second": 110.158,
"step": 2600
},
{
"epoch": 0.69,
"eval_accuracy": 0.6580203771591187,
"eval_loss": 0.7793189287185669,
"eval_runtime": 7.1229,
"eval_samples_per_second": 882.221,
"eval_steps_per_second": 110.348,
"step": 2800
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.8344,
"step": 3000
},
{
"epoch": 0.74,
"eval_accuracy": 0.660566508769989,
"eval_loss": 0.7755009531974792,
"eval_runtime": 7.1376,
"eval_samples_per_second": 880.411,
"eval_steps_per_second": 110.121,
"step": 3000
},
{
"epoch": 0.79,
"eval_accuracy": 0.6670910120010376,
"eval_loss": 0.769910454750061,
"eval_runtime": 7.1523,
"eval_samples_per_second": 878.594,
"eval_steps_per_second": 109.894,
"step": 3200
},
{
"epoch": 0.84,
"eval_accuracy": 0.6683641076087952,
"eval_loss": 0.7647598385810852,
"eval_runtime": 7.1504,
"eval_samples_per_second": 878.833,
"eval_steps_per_second": 109.924,
"step": 3400
},
{
"epoch": 0.86,
"learning_rate": 4.5689655172413794e-05,
"loss": 0.8204,
"step": 3500
},
{
"epoch": 0.89,
"eval_accuracy": 0.6705919504165649,
"eval_loss": 0.7597415447235107,
"eval_runtime": 7.1426,
"eval_samples_per_second": 879.79,
"eval_steps_per_second": 110.044,
"step": 3600
},
{
"epoch": 0.94,
"eval_accuracy": 0.6683641076087952,
"eval_loss": 0.7540149688720703,
"eval_runtime": 7.1581,
"eval_samples_per_second": 877.882,
"eval_steps_per_second": 109.805,
"step": 3800
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.8125,
"step": 4000
},
{
"epoch": 0.99,
"eval_accuracy": 0.6728198528289795,
"eval_loss": 0.7508071660995483,
"eval_runtime": 7.1283,
"eval_samples_per_second": 881.556,
"eval_steps_per_second": 110.265,
"step": 4000
},
{
"epoch": 1.03,
"eval_accuracy": 0.6731381416320801,
"eval_loss": 0.7491191029548645,
"eval_runtime": 7.1623,
"eval_samples_per_second": 877.371,
"eval_steps_per_second": 109.741,
"step": 4200
},
{
"epoch": 1.08,
"eval_accuracy": 0.6718650460243225,
"eval_loss": 0.7508663535118103,
"eval_runtime": 7.1429,
"eval_samples_per_second": 879.752,
"eval_steps_per_second": 110.039,
"step": 4400
},
{
"epoch": 1.11,
"learning_rate": 4.4458128078817734e-05,
"loss": 0.786,
"step": 4500
},
{
"epoch": 1.13,
"eval_accuracy": 0.6767982244491577,
"eval_loss": 0.7442670464515686,
"eval_runtime": 7.1445,
"eval_samples_per_second": 879.556,
"eval_steps_per_second": 110.015,
"step": 4600
},
{
"epoch": 1.18,
"eval_accuracy": 0.6767982244491577,
"eval_loss": 0.7406799793243408,
"eval_runtime": 7.1302,
"eval_samples_per_second": 881.316,
"eval_steps_per_second": 110.235,
"step": 4800
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.78,
"step": 5000
},
{
"epoch": 1.23,
"eval_accuracy": 0.6799809336662292,
"eval_loss": 0.7390637993812561,
"eval_runtime": 7.1525,
"eval_samples_per_second": 878.57,
"eval_steps_per_second": 109.891,
"step": 5000
},
{
"epoch": 1.28,
"eval_accuracy": 0.681253969669342,
"eval_loss": 0.7361044883728027,
"eval_runtime": 7.1388,
"eval_samples_per_second": 880.261,
"eval_steps_per_second": 110.103,
"step": 5200
},
{
"epoch": 1.33,
"eval_accuracy": 0.6806174516677856,
"eval_loss": 0.732113242149353,
"eval_runtime": 7.1437,
"eval_samples_per_second": 879.653,
"eval_steps_per_second": 110.027,
"step": 5400
},
{
"epoch": 1.35,
"learning_rate": 4.3226600985221674e-05,
"loss": 0.7614,
"step": 5500
},
{
"epoch": 1.38,
"eval_accuracy": 0.6817314028739929,
"eval_loss": 0.7297388315200806,
"eval_runtime": 7.133,
"eval_samples_per_second": 880.97,
"eval_steps_per_second": 110.191,
"step": 5600
},
{
"epoch": 1.43,
"eval_accuracy": 0.6847549080848694,
"eval_loss": 0.730475902557373,
"eval_runtime": 7.1451,
"eval_samples_per_second": 879.487,
"eval_steps_per_second": 110.006,
"step": 5800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.7592,
"step": 6000
},
{
"epoch": 1.48,
"eval_accuracy": 0.6876193284988403,
"eval_loss": 0.7237519025802612,
"eval_runtime": 7.1475,
"eval_samples_per_second": 879.19,
"eval_steps_per_second": 109.969,
"step": 6000
},
{
"epoch": 1.53,
"eval_accuracy": 0.6947804093360901,
"eval_loss": 0.7222017049789429,
"eval_runtime": 7.1471,
"eval_samples_per_second": 879.234,
"eval_steps_per_second": 109.974,
"step": 6200
},
{
"epoch": 1.58,
"eval_accuracy": 0.6931890249252319,
"eval_loss": 0.7157117128372192,
"eval_runtime": 7.1272,
"eval_samples_per_second": 881.687,
"eval_steps_per_second": 110.281,
"step": 6400
},
{
"epoch": 1.6,
"learning_rate": 4.199507389162562e-05,
"loss": 0.7596,
"step": 6500
},
{
"epoch": 1.63,
"eval_accuracy": 0.6908020377159119,
"eval_loss": 0.7141013741493225,
"eval_runtime": 7.1497,
"eval_samples_per_second": 878.921,
"eval_steps_per_second": 109.935,
"step": 6600
},
{
"epoch": 1.67,
"eval_accuracy": 0.6901655197143555,
"eval_loss": 0.7196224331855774,
"eval_runtime": 7.1397,
"eval_samples_per_second": 880.145,
"eval_steps_per_second": 110.088,
"step": 6800
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.7502,
"step": 7000
},
{
"epoch": 1.72,
"eval_accuracy": 0.6928707957267761,
"eval_loss": 0.7143052816390991,
"eval_runtime": 7.1641,
"eval_samples_per_second": 877.149,
"eval_steps_per_second": 109.713,
"step": 7000
},
{
"epoch": 1.77,
"eval_accuracy": 0.6971673965454102,
"eval_loss": 0.7072923183441162,
"eval_runtime": 7.1434,
"eval_samples_per_second": 879.692,
"eval_steps_per_second": 110.032,
"step": 7200
},
{
"epoch": 1.82,
"eval_accuracy": 0.7013049125671387,
"eval_loss": 0.7065105438232422,
"eval_runtime": 7.1601,
"eval_samples_per_second": 877.644,
"eval_steps_per_second": 109.775,
"step": 7400
},
{
"epoch": 1.85,
"learning_rate": 4.076354679802955e-05,
"loss": 0.7496,
"step": 7500
},
{
"epoch": 1.87,
"eval_accuracy": 0.6970082521438599,
"eval_loss": 0.7033880949020386,
"eval_runtime": 7.1435,
"eval_samples_per_second": 879.677,
"eval_steps_per_second": 110.03,
"step": 7600
},
{
"epoch": 1.92,
"eval_accuracy": 0.7046467065811157,
"eval_loss": 0.7007125020027161,
"eval_runtime": 7.1609,
"eval_samples_per_second": 877.54,
"eval_steps_per_second": 109.762,
"step": 7800
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.746,
"step": 8000
},
{
"epoch": 1.97,
"eval_accuracy": 0.7022597193717957,
"eval_loss": 0.6992939710617065,
"eval_runtime": 7.1492,
"eval_samples_per_second": 878.974,
"eval_steps_per_second": 109.942,
"step": 8000
},
{
"epoch": 2.02,
"eval_accuracy": 0.6990770101547241,
"eval_loss": 0.704833447933197,
"eval_runtime": 7.1594,
"eval_samples_per_second": 877.725,
"eval_steps_per_second": 109.785,
"step": 8200
},
{
"epoch": 2.07,
"eval_accuracy": 0.705760657787323,
"eval_loss": 0.6999921202659607,
"eval_runtime": 7.1391,
"eval_samples_per_second": 880.219,
"eval_steps_per_second": 110.097,
"step": 8400
},
{
"epoch": 2.09,
"learning_rate": 3.95320197044335e-05,
"loss": 0.7242,
"step": 8500
},
{
"epoch": 2.12,
"eval_accuracy": 0.7013049125671387,
"eval_loss": 0.700331449508667,
"eval_runtime": 7.1723,
"eval_samples_per_second": 876.149,
"eval_steps_per_second": 109.588,
"step": 8600
},
{
"epoch": 2.17,
"eval_accuracy": 0.7043284773826599,
"eval_loss": 0.7018357515335083,
"eval_runtime": 7.1315,
"eval_samples_per_second": 881.159,
"eval_steps_per_second": 110.215,
"step": 8800
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.7127,
"step": 9000
},
{
"epoch": 2.22,
"eval_accuracy": 0.7008274793624878,
"eval_loss": 0.7000855803489685,
"eval_runtime": 7.1461,
"eval_samples_per_second": 879.36,
"eval_steps_per_second": 109.99,
"step": 9000
},
{
"epoch": 2.27,
"eval_accuracy": 0.7009866237640381,
"eval_loss": 0.6994293928146362,
"eval_runtime": 7.1376,
"eval_samples_per_second": 880.41,
"eval_steps_per_second": 110.121,
"step": 9200
},
{
"epoch": 2.32,
"eval_accuracy": 0.7036918997764587,
"eval_loss": 0.6989775896072388,
"eval_runtime": 7.1543,
"eval_samples_per_second": 878.359,
"eval_steps_per_second": 109.865,
"step": 9400
},
{
"epoch": 2.34,
"learning_rate": 3.830049261083744e-05,
"loss": 0.7142,
"step": 9500
},
{
"epoch": 2.36,
"eval_accuracy": 0.7021005749702454,
"eval_loss": 0.6990280151367188,
"eval_runtime": 7.1395,
"eval_samples_per_second": 880.172,
"eval_steps_per_second": 110.092,
"step": 9600
},
{
"epoch": 2.41,
"eval_accuracy": 0.711012065410614,
"eval_loss": 0.6901064515113831,
"eval_runtime": 7.1486,
"eval_samples_per_second": 879.059,
"eval_steps_per_second": 109.952,
"step": 9800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.711,
"step": 10000
},
{
"epoch": 2.46,
"eval_accuracy": 0.7097390294075012,
"eval_loss": 0.6861850619316101,
"eval_runtime": 7.1333,
"eval_samples_per_second": 880.945,
"eval_steps_per_second": 110.188,
"step": 10000
},
{
"epoch": 2.51,
"eval_accuracy": 0.7105346918106079,
"eval_loss": 0.6924527287483215,
"eval_runtime": 7.1467,
"eval_samples_per_second": 879.292,
"eval_steps_per_second": 109.981,
"step": 10200
},
{
"epoch": 2.56,
"eval_accuracy": 0.7073519825935364,
"eval_loss": 0.6872392296791077,
"eval_runtime": 7.1334,
"eval_samples_per_second": 880.924,
"eval_steps_per_second": 110.186,
"step": 10400
},
{
"epoch": 2.59,
"learning_rate": 3.7068965517241385e-05,
"loss": 0.7083,
"step": 10500
},
{
"epoch": 2.61,
"eval_accuracy": 0.7133991122245789,
"eval_loss": 0.6849201321601868,
"eval_runtime": 7.1414,
"eval_samples_per_second": 879.941,
"eval_steps_per_second": 110.063,
"step": 10600
},
{
"epoch": 2.66,
"eval_accuracy": 0.711966872215271,
"eval_loss": 0.6847355365753174,
"eval_runtime": 7.1397,
"eval_samples_per_second": 880.145,
"eval_steps_per_second": 110.088,
"step": 10800
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.6995,
"step": 11000
},
{
"epoch": 2.71,
"eval_accuracy": 0.7130808234214783,
"eval_loss": 0.6875377893447876,
"eval_runtime": 7.146,
"eval_samples_per_second": 879.379,
"eval_steps_per_second": 109.992,
"step": 11000
},
{
"epoch": 2.76,
"eval_accuracy": 0.7157860994338989,
"eval_loss": 0.6812172532081604,
"eval_runtime": 7.1597,
"eval_samples_per_second": 877.692,
"eval_steps_per_second": 109.781,
"step": 11200
},
{
"epoch": 2.81,
"eval_accuracy": 0.7127625942230225,
"eval_loss": 0.680852472782135,
"eval_runtime": 7.1585,
"eval_samples_per_second": 877.836,
"eval_steps_per_second": 109.799,
"step": 11400
},
{
"epoch": 2.83,
"learning_rate": 3.583743842364532e-05,
"loss": 0.7014,
"step": 11500
},
{
"epoch": 2.86,
"eval_accuracy": 0.7146722078323364,
"eval_loss": 0.6794761419296265,
"eval_runtime": 7.1353,
"eval_samples_per_second": 880.696,
"eval_steps_per_second": 110.157,
"step": 11600
},
{
"epoch": 2.91,
"eval_accuracy": 0.7130808234214783,
"eval_loss": 0.6790280938148499,
"eval_runtime": 7.1362,
"eval_samples_per_second": 880.579,
"eval_steps_per_second": 110.142,
"step": 11800
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.6979,
"step": 12000
},
{
"epoch": 2.96,
"eval_accuracy": 0.7159452438354492,
"eval_loss": 0.6799929738044739,
"eval_runtime": 7.1383,
"eval_samples_per_second": 880.323,
"eval_steps_per_second": 110.11,
"step": 12000
},
{
"epoch": 3.0,
"eval_accuracy": 0.7122851610183716,
"eval_loss": 0.674569845199585,
"eval_runtime": 7.1445,
"eval_samples_per_second": 879.554,
"eval_steps_per_second": 110.014,
"step": 12200
},
{
"epoch": 3.05,
"eval_accuracy": 0.7169000506401062,
"eval_loss": 0.6750736832618713,
"eval_runtime": 7.1334,
"eval_samples_per_second": 880.923,
"eval_steps_per_second": 110.185,
"step": 12400
},
{
"epoch": 3.08,
"learning_rate": 3.4605911330049265e-05,
"loss": 0.6886,
"step": 12500
},
{
"epoch": 3.1,
"eval_accuracy": 0.7151495814323425,
"eval_loss": 0.675022542476654,
"eval_runtime": 7.1463,
"eval_samples_per_second": 879.333,
"eval_steps_per_second": 109.987,
"step": 12600
},
{
"epoch": 3.15,
"eval_accuracy": 0.7137174010276794,
"eval_loss": 0.6786561012268066,
"eval_runtime": 7.1561,
"eval_samples_per_second": 878.138,
"eval_steps_per_second": 109.837,
"step": 12800
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.6733,
"step": 13000
},
{
"epoch": 3.2,
"eval_accuracy": 0.7189688086509705,
"eval_loss": 0.6791766881942749,
"eval_runtime": 7.1682,
"eval_samples_per_second": 876.651,
"eval_steps_per_second": 109.651,
"step": 13000
},
{
"epoch": 3.25,
"eval_accuracy": 0.7149904370307922,
"eval_loss": 0.680040717124939,
"eval_runtime": 7.1467,
"eval_samples_per_second": 879.292,
"eval_steps_per_second": 109.981,
"step": 13200
},
{
"epoch": 3.3,
"eval_accuracy": 0.7148312926292419,
"eval_loss": 0.6780914664268494,
"eval_runtime": 7.1572,
"eval_samples_per_second": 878.0,
"eval_steps_per_second": 109.82,
"step": 13400
},
{
"epoch": 3.33,
"learning_rate": 3.3374384236453204e-05,
"loss": 0.6685,
"step": 13500
},
{
"epoch": 3.35,
"eval_accuracy": 0.7191279530525208,
"eval_loss": 0.6794589161872864,
"eval_runtime": 7.1322,
"eval_samples_per_second": 881.071,
"eval_steps_per_second": 110.204,
"step": 13600
},
{
"epoch": 3.4,
"eval_accuracy": 0.7188096642494202,
"eval_loss": 0.6752949953079224,
"eval_runtime": 7.134,
"eval_samples_per_second": 880.857,
"eval_steps_per_second": 110.177,
"step": 13800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.6682,
"step": 14000
},
{
"epoch": 3.45,
"eval_accuracy": 0.7194462418556213,
"eval_loss": 0.6764330863952637,
"eval_runtime": 7.1203,
"eval_samples_per_second": 882.545,
"eval_steps_per_second": 110.388,
"step": 14000
},
{
"epoch": 3.5,
"eval_accuracy": 0.7172183394432068,
"eval_loss": 0.6686244010925293,
"eval_runtime": 7.1712,
"eval_samples_per_second": 876.284,
"eval_steps_per_second": 109.605,
"step": 14200
},
{
"epoch": 3.55,
"eval_accuracy": 0.7184914350509644,
"eval_loss": 0.6764801144599915,
"eval_runtime": 7.1495,
"eval_samples_per_second": 878.938,
"eval_steps_per_second": 109.937,
"step": 14400
},
{
"epoch": 3.57,
"learning_rate": 3.2142857142857144e-05,
"loss": 0.6764,
"step": 14500
},
{
"epoch": 3.6,
"eval_accuracy": 0.7191279530525208,
"eval_loss": 0.6725979447364807,
"eval_runtime": 7.1706,
"eval_samples_per_second": 876.361,
"eval_steps_per_second": 109.615,
"step": 14600
},
{
"epoch": 3.65,
"eval_accuracy": 0.7191279530525208,
"eval_loss": 0.6702936291694641,
"eval_runtime": 7.1416,
"eval_samples_per_second": 879.918,
"eval_steps_per_second": 110.06,
"step": 14800
},
{
"epoch": 3.69,
"learning_rate": 3.152709359605912e-05,
"loss": 0.6693,
"step": 15000
},
{
"epoch": 3.69,
"eval_accuracy": 0.7226288914680481,
"eval_loss": 0.6674948930740356,
"eval_runtime": 7.1555,
"eval_samples_per_second": 878.209,
"eval_steps_per_second": 109.846,
"step": 15000
},
{
"epoch": 3.74,
"eval_accuracy": 0.7232654094696045,
"eval_loss": 0.6659447550773621,
"eval_runtime": 7.1241,
"eval_samples_per_second": 882.079,
"eval_steps_per_second": 110.33,
"step": 15200
},
{
"epoch": 3.79,
"eval_accuracy": 0.7235836982727051,
"eval_loss": 0.6667641997337341,
"eval_runtime": 7.132,
"eval_samples_per_second": 881.1,
"eval_steps_per_second": 110.208,
"step": 15400
},
{
"epoch": 3.82,
"learning_rate": 3.0911330049261084e-05,
"loss": 0.6736,
"step": 15500
},
{
"epoch": 3.84,
"eval_accuracy": 0.7205601334571838,
"eval_loss": 0.6661719679832458,
"eval_runtime": 7.1184,
"eval_samples_per_second": 882.786,
"eval_steps_per_second": 110.419,
"step": 15600
},
{
"epoch": 3.89,
"eval_accuracy": 0.7205601334571838,
"eval_loss": 0.6669681668281555,
"eval_runtime": 7.1374,
"eval_samples_per_second": 880.427,
"eval_steps_per_second": 110.123,
"step": 15800
},
{
"epoch": 3.94,
"learning_rate": 3.0295566502463057e-05,
"loss": 0.6718,
"step": 16000
},
{
"epoch": 3.94,
"eval_accuracy": 0.7204009890556335,
"eval_loss": 0.6656709313392639,
"eval_runtime": 7.1161,
"eval_samples_per_second": 883.063,
"eval_steps_per_second": 110.453,
"step": 16000
},
{
"epoch": 3.99,
"eval_accuracy": 0.7242202162742615,
"eval_loss": 0.66374671459198,
"eval_runtime": 7.1264,
"eval_samples_per_second": 881.788,
"eval_steps_per_second": 110.294,
"step": 16200
},
{
"epoch": 4.04,
"eval_accuracy": 0.720241904258728,
"eval_loss": 0.6692517995834351,
"eval_runtime": 7.1197,
"eval_samples_per_second": 882.62,
"eval_steps_per_second": 110.398,
"step": 16400
},
{
"epoch": 4.06,
"learning_rate": 2.9679802955665027e-05,
"loss": 0.656,
"step": 16500
},
{
"epoch": 4.09,
"eval_accuracy": 0.7210375666618347,
"eval_loss": 0.6680518984794617,
"eval_runtime": 7.1182,
"eval_samples_per_second": 882.811,
"eval_steps_per_second": 110.422,
"step": 16600
},
{
"epoch": 4.14,
"eval_accuracy": 0.723106324672699,
"eval_loss": 0.6737614274024963,
"eval_runtime": 7.1146,
"eval_samples_per_second": 883.25,
"eval_steps_per_second": 110.477,
"step": 16800
},
{
"epoch": 4.19,
"learning_rate": 2.9064039408866993e-05,
"loss": 0.6429,
"step": 17000
},
{
"epoch": 4.19,
"eval_accuracy": 0.7194462418556213,
"eval_loss": 0.6714398860931396,
"eval_runtime": 7.1257,
"eval_samples_per_second": 881.881,
"eval_steps_per_second": 110.305,
"step": 17000
},
{
"epoch": 4.24,
"eval_accuracy": 0.7216740846633911,
"eval_loss": 0.6679774522781372,
"eval_runtime": 7.124,
"eval_samples_per_second": 882.094,
"eval_steps_per_second": 110.332,
"step": 17200
},
{
"epoch": 4.29,
"eval_accuracy": 0.7210375666618347,
"eval_loss": 0.6685846447944641,
"eval_runtime": 7.1342,
"eval_samples_per_second": 880.83,
"eval_steps_per_second": 110.174,
"step": 17400
},
{
"epoch": 4.31,
"learning_rate": 2.844827586206897e-05,
"loss": 0.6464,
"step": 17500
},
{
"epoch": 4.33,
"eval_accuracy": 0.7270846366882324,
"eval_loss": 0.670677125453949,
"eval_runtime": 7.1152,
"eval_samples_per_second": 883.18,
"eval_steps_per_second": 110.468,
"step": 17600
},
{
"epoch": 4.38,
"eval_accuracy": 0.7251750230789185,
"eval_loss": 0.6647821068763733,
"eval_runtime": 7.1238,
"eval_samples_per_second": 882.114,
"eval_steps_per_second": 110.334,
"step": 17800
},
{
"epoch": 4.43,
"learning_rate": 2.7832512315270936e-05,
"loss": 0.6428,
"step": 18000
},
{
"epoch": 4.43,
"eval_accuracy": 0.7246976494789124,
"eval_loss": 0.6627594232559204,
"eval_runtime": 7.13,
"eval_samples_per_second": 881.351,
"eval_steps_per_second": 110.239,
"step": 18000
},
{
"epoch": 4.48,
"eval_accuracy": 0.7239019870758057,
"eval_loss": 0.664979100227356,
"eval_runtime": 7.1457,
"eval_samples_per_second": 879.411,
"eval_steps_per_second": 109.996,
"step": 18200
},
{
"epoch": 4.53,
"eval_accuracy": 0.7256524562835693,
"eval_loss": 0.6631101965904236,
"eval_runtime": 7.128,
"eval_samples_per_second": 881.597,
"eval_steps_per_second": 110.27,
"step": 18400
},
{
"epoch": 4.56,
"learning_rate": 2.7216748768472906e-05,
"loss": 0.6404,
"step": 18500
},
{
"epoch": 4.58,
"eval_accuracy": 0.7235836982727051,
"eval_loss": 0.6692806482315063,
"eval_runtime": 7.1275,
"eval_samples_per_second": 881.66,
"eval_steps_per_second": 110.278,
"step": 18600
},
{
"epoch": 4.63,
"eval_accuracy": 0.72835773229599,
"eval_loss": 0.6607570648193359,
"eval_runtime": 7.1216,
"eval_samples_per_second": 882.388,
"eval_steps_per_second": 110.369,
"step": 18800
},
{
"epoch": 4.68,
"learning_rate": 2.660098522167488e-05,
"loss": 0.6395,
"step": 19000
},
{
"epoch": 4.68,
"eval_accuracy": 0.7259707450866699,
"eval_loss": 0.6591860055923462,
"eval_runtime": 7.1318,
"eval_samples_per_second": 881.119,
"eval_steps_per_second": 110.21,
"step": 19000
},
{
"epoch": 4.73,
"eval_accuracy": 0.7277212142944336,
"eval_loss": 0.657184898853302,
"eval_runtime": 7.1172,
"eval_samples_per_second": 882.928,
"eval_steps_per_second": 110.436,
"step": 19200
},
{
"epoch": 4.78,
"eval_accuracy": 0.7291533946990967,
"eval_loss": 0.6624195575714111,
"eval_runtime": 7.1289,
"eval_samples_per_second": 881.476,
"eval_steps_per_second": 110.255,
"step": 19400
},
{
"epoch": 4.8,
"learning_rate": 2.598522167487685e-05,
"loss": 0.6374,
"step": 19500
},
{
"epoch": 4.83,
"eval_accuracy": 0.729312539100647,
"eval_loss": 0.6546571254730225,
"eval_runtime": 7.1204,
"eval_samples_per_second": 882.541,
"eval_steps_per_second": 110.388,
"step": 19600
},
{
"epoch": 4.88,
"eval_accuracy": 0.7278803586959839,
"eval_loss": 0.6573053598403931,
"eval_runtime": 7.1365,
"eval_samples_per_second": 880.549,
"eval_steps_per_second": 110.139,
"step": 19800
},
{
"epoch": 4.93,
"learning_rate": 2.5369458128078822e-05,
"loss": 0.646,
"step": 20000
},
{
"epoch": 4.93,
"eval_accuracy": 0.7258116006851196,
"eval_loss": 0.6613573431968689,
"eval_runtime": 7.1306,
"eval_samples_per_second": 881.266,
"eval_steps_per_second": 110.228,
"step": 20000
},
{
"epoch": 4.98,
"eval_accuracy": 0.7281985878944397,
"eval_loss": 0.6586018800735474,
"eval_runtime": 7.1343,
"eval_samples_per_second": 880.812,
"eval_steps_per_second": 110.172,
"step": 20200
},
{
"epoch": 5.02,
"eval_accuracy": 0.727402925491333,
"eval_loss": 0.6686715483665466,
"eval_runtime": 7.1093,
"eval_samples_per_second": 883.91,
"eval_steps_per_second": 110.559,
"step": 20400
},
{
"epoch": 5.05,
"learning_rate": 2.475369458128079e-05,
"loss": 0.629,
"step": 20500
},
{
"epoch": 5.07,
"eval_accuracy": 0.7250159382820129,
"eval_loss": 0.66302090883255,
"eval_runtime": 7.134,
"eval_samples_per_second": 880.855,
"eval_steps_per_second": 110.177,
"step": 20600
},
{
"epoch": 5.12,
"eval_accuracy": 0.7280394434928894,
"eval_loss": 0.667898416519165,
"eval_runtime": 7.117,
"eval_samples_per_second": 882.951,
"eval_steps_per_second": 110.439,
"step": 20800
},
{
"epoch": 5.17,
"learning_rate": 2.413793103448276e-05,
"loss": 0.617,
"step": 21000
},
{
"epoch": 5.17,
"eval_accuracy": 0.7280394434928894,
"eval_loss": 0.6668043732643127,
"eval_runtime": 7.1304,
"eval_samples_per_second": 881.295,
"eval_steps_per_second": 110.232,
"step": 21000
},
{
"epoch": 5.22,
"eval_accuracy": 0.72835773229599,
"eval_loss": 0.6602988839149475,
"eval_runtime": 7.1165,
"eval_samples_per_second": 883.014,
"eval_steps_per_second": 110.447,
"step": 21200
},
{
"epoch": 5.27,
"eval_accuracy": 0.731699526309967,
"eval_loss": 0.6601841449737549,
"eval_runtime": 7.1229,
"eval_samples_per_second": 882.224,
"eval_steps_per_second": 110.348,
"step": 21400
},
{
"epoch": 5.3,
"learning_rate": 2.3522167487684728e-05,
"loss": 0.6239,
"step": 21500
},
{
"epoch": 5.32,
"eval_accuracy": 0.7299490571022034,
"eval_loss": 0.6645928025245667,
"eval_runtime": 7.1079,
"eval_samples_per_second": 884.089,
"eval_steps_per_second": 110.581,
"step": 21600
},
{
"epoch": 5.37,
"eval_accuracy": 0.7286760210990906,
"eval_loss": 0.6626018285751343,
"eval_runtime": 7.1349,
"eval_samples_per_second": 880.737,
"eval_steps_per_second": 110.162,
"step": 21800
},
{
"epoch": 5.42,
"learning_rate": 2.29064039408867e-05,
"loss": 0.6206,
"step": 22000
},
{
"epoch": 5.42,
"eval_accuracy": 0.7262889742851257,
"eval_loss": 0.6620416045188904,
"eval_runtime": 7.1083,
"eval_samples_per_second": 884.033,
"eval_steps_per_second": 110.574,
"step": 22000
},
{
"epoch": 5.47,
"eval_accuracy": 0.7259707450866699,
"eval_loss": 0.6685923933982849,
"eval_runtime": 7.1136,
"eval_samples_per_second": 883.379,
"eval_steps_per_second": 110.493,
"step": 22200
},
{
"epoch": 5.52,
"eval_accuracy": 0.7312221527099609,
"eval_loss": 0.6637505292892456,
"eval_runtime": 7.0986,
"eval_samples_per_second": 885.242,
"eval_steps_per_second": 110.726,
"step": 22400
},
{
"epoch": 5.54,
"learning_rate": 2.229064039408867e-05,
"loss": 0.614,
"step": 22500
},
{
"epoch": 5.57,
"eval_accuracy": 0.7297899723052979,
"eval_loss": 0.6602672338485718,
"eval_runtime": 7.1489,
"eval_samples_per_second": 879.01,
"eval_steps_per_second": 109.946,
"step": 22600
},
{
"epoch": 5.62,
"eval_accuracy": 0.7278803586959839,
"eval_loss": 0.662921667098999,
"eval_runtime": 7.105,
"eval_samples_per_second": 884.45,
"eval_steps_per_second": 110.627,
"step": 22800
},
{
"epoch": 5.67,
"learning_rate": 2.1674876847290644e-05,
"loss": 0.6193,
"step": 23000
},
{
"epoch": 5.67,
"eval_accuracy": 0.7323361039161682,
"eval_loss": 0.6599108576774597,
"eval_runtime": 7.1371,
"eval_samples_per_second": 880.464,
"eval_steps_per_second": 110.128,
"step": 23000
},
{
"epoch": 5.71,
"eval_accuracy": 0.7286760210990906,
"eval_loss": 0.6652523279190063,
"eval_runtime": 7.1062,
"eval_samples_per_second": 884.296,
"eval_steps_per_second": 110.607,
"step": 23200
},
{
"epoch": 5.76,
"eval_accuracy": 0.7288351655006409,
"eval_loss": 0.6546856760978699,
"eval_runtime": 7.1178,
"eval_samples_per_second": 882.851,
"eval_steps_per_second": 110.427,
"step": 23400
},
{
"epoch": 5.79,
"learning_rate": 2.105911330049261e-05,
"loss": 0.6226,
"step": 23500
},
{
"epoch": 5.81,
"eval_accuracy": 0.730267345905304,
"eval_loss": 0.6600866913795471,
"eval_runtime": 7.0969,
"eval_samples_per_second": 885.457,
"eval_steps_per_second": 110.753,
"step": 23600
},
{
"epoch": 5.86,
"eval_accuracy": 0.72835773229599,
"eval_loss": 0.6599671244621277,
"eval_runtime": 7.1128,
"eval_samples_per_second": 883.483,
"eval_steps_per_second": 110.506,
"step": 23800
},
{
"epoch": 5.91,
"learning_rate": 2.0443349753694584e-05,
"loss": 0.6158,
"step": 24000
},
{
"epoch": 5.91,
"eval_accuracy": 0.7304264903068542,
"eval_loss": 0.6600461602210999,
"eval_runtime": 7.1187,
"eval_samples_per_second": 882.74,
"eval_steps_per_second": 110.413,
"step": 24000
},
{
"epoch": 5.96,
"eval_accuracy": 0.7256524562835693,
"eval_loss": 0.6626370549201965,
"eval_runtime": 7.1145,
"eval_samples_per_second": 883.27,
"eval_steps_per_second": 110.479,
"step": 24200
},
{
"epoch": 6.01,
"eval_accuracy": 0.7334500551223755,
"eval_loss": 0.6616737842559814,
"eval_runtime": 7.1091,
"eval_samples_per_second": 883.933,
"eval_steps_per_second": 110.562,
"step": 24400
},
{
"epoch": 6.03,
"learning_rate": 1.9827586206896554e-05,
"loss": 0.6115,
"step": 24500
},
{
"epoch": 6.06,
"eval_accuracy": 0.7310630083084106,
"eval_loss": 0.6600754261016846,
"eval_runtime": 7.1175,
"eval_samples_per_second": 882.895,
"eval_steps_per_second": 110.432,
"step": 24600
},
{
"epoch": 6.11,
"eval_accuracy": 0.7340865731239319,
"eval_loss": 0.6601821780204773,
"eval_runtime": 7.1275,
"eval_samples_per_second": 881.653,
"eval_steps_per_second": 110.277,
"step": 24800
},
{
"epoch": 6.16,
"learning_rate": 1.921182266009852e-05,
"loss": 0.6057,
"step": 25000
},
{
"epoch": 6.16,
"eval_accuracy": 0.7321769595146179,
"eval_loss": 0.6605399250984192,
"eval_runtime": 7.1294,
"eval_samples_per_second": 881.424,
"eval_steps_per_second": 110.248,
"step": 25000
},
{
"epoch": 6.21,
"eval_accuracy": 0.7288351655006409,
"eval_loss": 0.6598327159881592,
"eval_runtime": 7.119,
"eval_samples_per_second": 882.705,
"eval_steps_per_second": 110.408,
"step": 25200
},
{
"epoch": 6.26,
"eval_accuracy": 0.7297899723052979,
"eval_loss": 0.6586458683013916,
"eval_runtime": 7.1237,
"eval_samples_per_second": 882.12,
"eval_steps_per_second": 110.335,
"step": 25400
},
{
"epoch": 6.28,
"learning_rate": 1.8596059113300493e-05,
"loss": 0.5992,
"step": 25500
},
{
"epoch": 6.31,
"eval_accuracy": 0.7342457175254822,
"eval_loss": 0.6593834161758423,
"eval_runtime": 7.1555,
"eval_samples_per_second": 878.205,
"eval_steps_per_second": 109.846,
"step": 25600
},
{
"epoch": 6.35,
"eval_accuracy": 0.733609139919281,
"eval_loss": 0.659511923789978,
"eval_runtime": 7.1271,
"eval_samples_per_second": 881.705,
"eval_steps_per_second": 110.283,
"step": 25800
},
{
"epoch": 6.4,
"learning_rate": 1.7980295566502463e-05,
"loss": 0.6028,
"step": 26000
},
{
"epoch": 6.4,
"eval_accuracy": 0.7299490571022034,
"eval_loss": 0.6572112441062927,
"eval_runtime": 7.1318,
"eval_samples_per_second": 881.126,
"eval_steps_per_second": 110.211,
"step": 26000
},
{
"epoch": 6.45,
"eval_accuracy": 0.7334500551223755,
"eval_loss": 0.6614532470703125,
"eval_runtime": 7.1291,
"eval_samples_per_second": 881.454,
"eval_steps_per_second": 110.252,
"step": 26200
},
{
"epoch": 6.5,
"eval_accuracy": 0.7304264903068542,
"eval_loss": 0.6609640717506409,
"eval_runtime": 7.1218,
"eval_samples_per_second": 882.36,
"eval_steps_per_second": 110.365,
"step": 26400
},
{
"epoch": 6.53,
"learning_rate": 1.7364532019704436e-05,
"loss": 0.6058,
"step": 26500
},
{
"epoch": 6.55,
"eval_accuracy": 0.7310630083084106,
"eval_loss": 0.6593703031539917,
"eval_runtime": 7.1821,
"eval_samples_per_second": 874.951,
"eval_steps_per_second": 109.438,
"step": 26600
},
{
"epoch": 6.6,
"eval_accuracy": 0.7334500551223755,
"eval_loss": 0.6582794785499573,
"eval_runtime": 7.1156,
"eval_samples_per_second": 883.128,
"eval_steps_per_second": 110.461,
"step": 26800
},
{
"epoch": 6.65,
"learning_rate": 1.6748768472906403e-05,
"loss": 0.5985,
"step": 27000
},
{
"epoch": 6.65,
"eval_accuracy": 0.7304264903068542,
"eval_loss": 0.6584789752960205,
"eval_runtime": 7.1255,
"eval_samples_per_second": 881.898,
"eval_steps_per_second": 110.307,
"step": 27000
},
{
"epoch": 6.7,
"eval_accuracy": 0.7323361039161682,
"eval_loss": 0.6626281142234802,
"eval_runtime": 7.1065,
"eval_samples_per_second": 884.263,
"eval_steps_per_second": 110.603,
"step": 27200
},
{
"epoch": 6.75,
"eval_accuracy": 0.727402925491333,
"eval_loss": 0.6593265533447266,
"eval_runtime": 7.1566,
"eval_samples_per_second": 878.068,
"eval_steps_per_second": 109.828,
"step": 27400
},
{
"epoch": 6.77,
"learning_rate": 1.6133004926108376e-05,
"loss": 0.6,
"step": 27500
},
{
"epoch": 6.8,
"eval_accuracy": 0.7328134775161743,
"eval_loss": 0.6583454012870789,
"eval_runtime": 7.1076,
"eval_samples_per_second": 884.119,
"eval_steps_per_second": 110.585,
"step": 27600
},
{
"epoch": 6.85,
"eval_accuracy": 0.732654333114624,
"eval_loss": 0.6581458449363708,
"eval_runtime": 7.1149,
"eval_samples_per_second": 883.214,
"eval_steps_per_second": 110.472,
"step": 27800
},
{
"epoch": 6.9,
"learning_rate": 1.5517241379310346e-05,
"loss": 0.6009,
"step": 28000
},
{
"epoch": 6.9,
"eval_accuracy": 0.7334500551223755,
"eval_loss": 0.6602644324302673,
"eval_runtime": 7.1115,
"eval_samples_per_second": 883.644,
"eval_steps_per_second": 110.526,
"step": 28000
},
{
"epoch": 6.95,
"eval_accuracy": 0.7315404415130615,
"eval_loss": 0.6539360284805298,
"eval_runtime": 7.1218,
"eval_samples_per_second": 882.358,
"eval_steps_per_second": 110.365,
"step": 28200
},
{
"epoch": 7.0,
"eval_accuracy": 0.733609139919281,
"eval_loss": 0.6602201461791992,
"eval_runtime": 7.1034,
"eval_samples_per_second": 884.646,
"eval_steps_per_second": 110.651,
"step": 28400
},
{
"epoch": 7.02,
"learning_rate": 1.4901477832512317e-05,
"loss": 0.6013,
"step": 28500
},
{
"epoch": 7.04,
"eval_accuracy": 0.732654333114624,
"eval_loss": 0.6585041284561157,
"eval_runtime": 7.1174,
"eval_samples_per_second": 882.908,
"eval_steps_per_second": 110.434,
"step": 28600
},
{
"epoch": 7.09,
"eval_accuracy": 0.7301082015037537,
"eval_loss": 0.659524142742157,
"eval_runtime": 7.0987,
"eval_samples_per_second": 885.233,
"eval_steps_per_second": 110.725,
"step": 28800
},
{
"epoch": 7.14,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.5937,
"step": 29000
},
{
"epoch": 7.14,
"eval_accuracy": 0.7318586707115173,
"eval_loss": 0.6640517711639404,
"eval_runtime": 7.1062,
"eval_samples_per_second": 884.293,
"eval_steps_per_second": 110.607,
"step": 29000
},
{
"epoch": 7.19,
"eval_accuracy": 0.7324952483177185,
"eval_loss": 0.6623978018760681,
"eval_runtime": 7.1076,
"eval_samples_per_second": 884.119,
"eval_steps_per_second": 110.585,
"step": 29200
},
{
"epoch": 7.24,
"eval_accuracy": 0.7321769595146179,
"eval_loss": 0.6652867794036865,
"eval_runtime": 7.1683,
"eval_samples_per_second": 876.634,
"eval_steps_per_second": 109.649,
"step": 29400
},
{
"epoch": 7.27,
"learning_rate": 1.3669950738916257e-05,
"loss": 0.5779,
"step": 29500
},
{
"epoch": 7.29,
"eval_accuracy": 0.7324952483177185,
"eval_loss": 0.6649503111839294,
"eval_runtime": 7.1045,
"eval_samples_per_second": 884.505,
"eval_steps_per_second": 110.634,
"step": 29600
},
{
"epoch": 7.34,
"eval_accuracy": 0.7332909107208252,
"eval_loss": 0.6616361141204834,
"eval_runtime": 7.114,
"eval_samples_per_second": 883.327,
"eval_steps_per_second": 110.486,
"step": 29800
},
{
"epoch": 7.39,
"learning_rate": 1.3054187192118228e-05,
"loss": 0.5809,
"step": 30000
},
{
"epoch": 7.39,
"eval_accuracy": 0.7312221527099609,
"eval_loss": 0.6653700470924377,
"eval_runtime": 7.1061,
"eval_samples_per_second": 884.315,
"eval_steps_per_second": 110.61,
"step": 30000
},
{
"epoch": 7.39,
"step": 30000,
"total_flos": 4.469740936607232e+16,
"train_loss": 0.6925110097249348,
"train_runtime": 5542.0479,
"train_samples_per_second": 468.744,
"train_steps_per_second": 7.326
}
],
"max_steps": 40600,
"num_train_epochs": 10,
"total_flos": 4.469740936607232e+16,
"trial_name": null,
"trial_params": null
}