|
{ |
|
"best_metric": 0.7334500551223755, |
|
"best_model_checkpoint": "../../checkpoints/baseline/default-baseline-uncleaned/lm_model/finetune/mnli-mm/checkpoint-28000", |
|
"epoch": 7.389162561576355, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.3381603956222534, |
|
"eval_loss": 1.0937632322311401, |
|
"eval_runtime": 7.1539, |
|
"eval_samples_per_second": 878.405, |
|
"eval_steps_per_second": 109.87, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.0056557655334473, |
|
"eval_runtime": 7.136, |
|
"eval_samples_per_second": 880.608, |
|
"eval_steps_per_second": 110.146, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.938423645320197e-05, |
|
"loss": 1.065, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.5747931003570557, |
|
"eval_loss": 0.9154016971588135, |
|
"eval_runtime": 7.1437, |
|
"eval_samples_per_second": 879.654, |
|
"eval_steps_per_second": 110.027, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5988224148750305, |
|
"eval_loss": 0.8770188689231873, |
|
"eval_runtime": 7.1559, |
|
"eval_samples_per_second": 878.161, |
|
"eval_steps_per_second": 109.84, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.876847290640394e-05, |
|
"loss": 0.9452, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.6098026633262634, |
|
"eval_loss": 0.863080620765686, |
|
"eval_runtime": 7.1512, |
|
"eval_samples_per_second": 878.732, |
|
"eval_steps_per_second": 109.911, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.619191586971283, |
|
"eval_loss": 0.8449499011039734, |
|
"eval_runtime": 7.1301, |
|
"eval_samples_per_second": 881.33, |
|
"eval_steps_per_second": 110.236, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.630808413028717, |
|
"eval_loss": 0.8298859596252441, |
|
"eval_runtime": 7.1347, |
|
"eval_samples_per_second": 880.768, |
|
"eval_steps_per_second": 110.166, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8152709359605915e-05, |
|
"loss": 0.9048, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.6325588822364807, |
|
"eval_loss": 0.8225136995315552, |
|
"eval_runtime": 7.1209, |
|
"eval_samples_per_second": 882.468, |
|
"eval_steps_per_second": 110.379, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.6378102898597717, |
|
"eval_loss": 0.8134775161743164, |
|
"eval_runtime": 7.1392, |
|
"eval_samples_per_second": 880.211, |
|
"eval_steps_per_second": 110.096, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.753694581280788e-05, |
|
"loss": 0.8715, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.6421069502830505, |
|
"eval_loss": 0.8081309795379639, |
|
"eval_runtime": 7.115, |
|
"eval_samples_per_second": 883.199, |
|
"eval_steps_per_second": 110.47, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.6468809843063354, |
|
"eval_loss": 0.7986512184143066, |
|
"eval_runtime": 7.1565, |
|
"eval_samples_per_second": 878.086, |
|
"eval_steps_per_second": 109.831, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.6494271159172058, |
|
"eval_loss": 0.7892561554908752, |
|
"eval_runtime": 7.1252, |
|
"eval_samples_per_second": 881.942, |
|
"eval_steps_per_second": 110.313, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6921182266009855e-05, |
|
"loss": 0.8542, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.6537237167358398, |
|
"eval_loss": 0.7885516881942749, |
|
"eval_runtime": 7.1352, |
|
"eval_samples_per_second": 880.701, |
|
"eval_steps_per_second": 110.158, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.6580203771591187, |
|
"eval_loss": 0.7793189287185669, |
|
"eval_runtime": 7.1229, |
|
"eval_samples_per_second": 882.221, |
|
"eval_steps_per_second": 110.348, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.630541871921182e-05, |
|
"loss": 0.8344, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.660566508769989, |
|
"eval_loss": 0.7755009531974792, |
|
"eval_runtime": 7.1376, |
|
"eval_samples_per_second": 880.411, |
|
"eval_steps_per_second": 110.121, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.6670910120010376, |
|
"eval_loss": 0.769910454750061, |
|
"eval_runtime": 7.1523, |
|
"eval_samples_per_second": 878.594, |
|
"eval_steps_per_second": 109.894, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.6683641076087952, |
|
"eval_loss": 0.7647598385810852, |
|
"eval_runtime": 7.1504, |
|
"eval_samples_per_second": 878.833, |
|
"eval_steps_per_second": 109.924, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.5689655172413794e-05, |
|
"loss": 0.8204, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.6705919504165649, |
|
"eval_loss": 0.7597415447235107, |
|
"eval_runtime": 7.1426, |
|
"eval_samples_per_second": 879.79, |
|
"eval_steps_per_second": 110.044, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.6683641076087952, |
|
"eval_loss": 0.7540149688720703, |
|
"eval_runtime": 7.1581, |
|
"eval_samples_per_second": 877.882, |
|
"eval_steps_per_second": 109.805, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.507389162561577e-05, |
|
"loss": 0.8125, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6728198528289795, |
|
"eval_loss": 0.7508071660995483, |
|
"eval_runtime": 7.1283, |
|
"eval_samples_per_second": 881.556, |
|
"eval_steps_per_second": 110.265, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.6731381416320801, |
|
"eval_loss": 0.7491191029548645, |
|
"eval_runtime": 7.1623, |
|
"eval_samples_per_second": 877.371, |
|
"eval_steps_per_second": 109.741, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_accuracy": 0.6718650460243225, |
|
"eval_loss": 0.7508663535118103, |
|
"eval_runtime": 7.1429, |
|
"eval_samples_per_second": 879.752, |
|
"eval_steps_per_second": 110.039, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.4458128078817734e-05, |
|
"loss": 0.786, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_accuracy": 0.6767982244491577, |
|
"eval_loss": 0.7442670464515686, |
|
"eval_runtime": 7.1445, |
|
"eval_samples_per_second": 879.556, |
|
"eval_steps_per_second": 110.015, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.6767982244491577, |
|
"eval_loss": 0.7406799793243408, |
|
"eval_runtime": 7.1302, |
|
"eval_samples_per_second": 881.316, |
|
"eval_steps_per_second": 110.235, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.384236453201971e-05, |
|
"loss": 0.78, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.6799809336662292, |
|
"eval_loss": 0.7390637993812561, |
|
"eval_runtime": 7.1525, |
|
"eval_samples_per_second": 878.57, |
|
"eval_steps_per_second": 109.891, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.681253969669342, |
|
"eval_loss": 0.7361044883728027, |
|
"eval_runtime": 7.1388, |
|
"eval_samples_per_second": 880.261, |
|
"eval_steps_per_second": 110.103, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.6806174516677856, |
|
"eval_loss": 0.732113242149353, |
|
"eval_runtime": 7.1437, |
|
"eval_samples_per_second": 879.653, |
|
"eval_steps_per_second": 110.027, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.3226600985221674e-05, |
|
"loss": 0.7614, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.6817314028739929, |
|
"eval_loss": 0.7297388315200806, |
|
"eval_runtime": 7.133, |
|
"eval_samples_per_second": 880.97, |
|
"eval_steps_per_second": 110.191, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.6847549080848694, |
|
"eval_loss": 0.730475902557373, |
|
"eval_runtime": 7.1451, |
|
"eval_samples_per_second": 879.487, |
|
"eval_steps_per_second": 110.006, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.261083743842365e-05, |
|
"loss": 0.7592, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.6876193284988403, |
|
"eval_loss": 0.7237519025802612, |
|
"eval_runtime": 7.1475, |
|
"eval_samples_per_second": 879.19, |
|
"eval_steps_per_second": 109.969, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.6947804093360901, |
|
"eval_loss": 0.7222017049789429, |
|
"eval_runtime": 7.1471, |
|
"eval_samples_per_second": 879.234, |
|
"eval_steps_per_second": 109.974, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.6931890249252319, |
|
"eval_loss": 0.7157117128372192, |
|
"eval_runtime": 7.1272, |
|
"eval_samples_per_second": 881.687, |
|
"eval_steps_per_second": 110.281, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.199507389162562e-05, |
|
"loss": 0.7596, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.6908020377159119, |
|
"eval_loss": 0.7141013741493225, |
|
"eval_runtime": 7.1497, |
|
"eval_samples_per_second": 878.921, |
|
"eval_steps_per_second": 109.935, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.6901655197143555, |
|
"eval_loss": 0.7196224331855774, |
|
"eval_runtime": 7.1397, |
|
"eval_samples_per_second": 880.145, |
|
"eval_steps_per_second": 110.088, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 0.7502, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_accuracy": 0.6928707957267761, |
|
"eval_loss": 0.7143052816390991, |
|
"eval_runtime": 7.1641, |
|
"eval_samples_per_second": 877.149, |
|
"eval_steps_per_second": 109.713, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.6971673965454102, |
|
"eval_loss": 0.7072923183441162, |
|
"eval_runtime": 7.1434, |
|
"eval_samples_per_second": 879.692, |
|
"eval_steps_per_second": 110.032, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy": 0.7013049125671387, |
|
"eval_loss": 0.7065105438232422, |
|
"eval_runtime": 7.1601, |
|
"eval_samples_per_second": 877.644, |
|
"eval_steps_per_second": 109.775, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.076354679802955e-05, |
|
"loss": 0.7496, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.6970082521438599, |
|
"eval_loss": 0.7033880949020386, |
|
"eval_runtime": 7.1435, |
|
"eval_samples_per_second": 879.677, |
|
"eval_steps_per_second": 110.03, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.7046467065811157, |
|
"eval_loss": 0.7007125020027161, |
|
"eval_runtime": 7.1609, |
|
"eval_samples_per_second": 877.54, |
|
"eval_steps_per_second": 109.762, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.014778325123153e-05, |
|
"loss": 0.746, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.7022597193717957, |
|
"eval_loss": 0.6992939710617065, |
|
"eval_runtime": 7.1492, |
|
"eval_samples_per_second": 878.974, |
|
"eval_steps_per_second": 109.942, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.6990770101547241, |
|
"eval_loss": 0.704833447933197, |
|
"eval_runtime": 7.1594, |
|
"eval_samples_per_second": 877.725, |
|
"eval_steps_per_second": 109.785, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.705760657787323, |
|
"eval_loss": 0.6999921202659607, |
|
"eval_runtime": 7.1391, |
|
"eval_samples_per_second": 880.219, |
|
"eval_steps_per_second": 110.097, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.95320197044335e-05, |
|
"loss": 0.7242, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.7013049125671387, |
|
"eval_loss": 0.700331449508667, |
|
"eval_runtime": 7.1723, |
|
"eval_samples_per_second": 876.149, |
|
"eval_steps_per_second": 109.588, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.7043284773826599, |
|
"eval_loss": 0.7018357515335083, |
|
"eval_runtime": 7.1315, |
|
"eval_samples_per_second": 881.159, |
|
"eval_steps_per_second": 110.215, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.891625615763547e-05, |
|
"loss": 0.7127, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.7008274793624878, |
|
"eval_loss": 0.7000855803489685, |
|
"eval_runtime": 7.1461, |
|
"eval_samples_per_second": 879.36, |
|
"eval_steps_per_second": 109.99, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.7009866237640381, |
|
"eval_loss": 0.6994293928146362, |
|
"eval_runtime": 7.1376, |
|
"eval_samples_per_second": 880.41, |
|
"eval_steps_per_second": 110.121, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.7036918997764587, |
|
"eval_loss": 0.6989775896072388, |
|
"eval_runtime": 7.1543, |
|
"eval_samples_per_second": 878.359, |
|
"eval_steps_per_second": 109.865, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.830049261083744e-05, |
|
"loss": 0.7142, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.7021005749702454, |
|
"eval_loss": 0.6990280151367188, |
|
"eval_runtime": 7.1395, |
|
"eval_samples_per_second": 880.172, |
|
"eval_steps_per_second": 110.092, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.711012065410614, |
|
"eval_loss": 0.6901064515113831, |
|
"eval_runtime": 7.1486, |
|
"eval_samples_per_second": 879.059, |
|
"eval_steps_per_second": 109.952, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.768472906403941e-05, |
|
"loss": 0.711, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.7097390294075012, |
|
"eval_loss": 0.6861850619316101, |
|
"eval_runtime": 7.1333, |
|
"eval_samples_per_second": 880.945, |
|
"eval_steps_per_second": 110.188, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.7105346918106079, |
|
"eval_loss": 0.6924527287483215, |
|
"eval_runtime": 7.1467, |
|
"eval_samples_per_second": 879.292, |
|
"eval_steps_per_second": 109.981, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.7073519825935364, |
|
"eval_loss": 0.6872392296791077, |
|
"eval_runtime": 7.1334, |
|
"eval_samples_per_second": 880.924, |
|
"eval_steps_per_second": 110.186, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.7068965517241385e-05, |
|
"loss": 0.7083, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.7133991122245789, |
|
"eval_loss": 0.6849201321601868, |
|
"eval_runtime": 7.1414, |
|
"eval_samples_per_second": 879.941, |
|
"eval_steps_per_second": 110.063, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.711966872215271, |
|
"eval_loss": 0.6847355365753174, |
|
"eval_runtime": 7.1397, |
|
"eval_samples_per_second": 880.145, |
|
"eval_steps_per_second": 110.088, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.645320197044335e-05, |
|
"loss": 0.6995, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 0.7130808234214783, |
|
"eval_loss": 0.6875377893447876, |
|
"eval_runtime": 7.146, |
|
"eval_samples_per_second": 879.379, |
|
"eval_steps_per_second": 109.992, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.7157860994338989, |
|
"eval_loss": 0.6812172532081604, |
|
"eval_runtime": 7.1597, |
|
"eval_samples_per_second": 877.692, |
|
"eval_steps_per_second": 109.781, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.7127625942230225, |
|
"eval_loss": 0.680852472782135, |
|
"eval_runtime": 7.1585, |
|
"eval_samples_per_second": 877.836, |
|
"eval_steps_per_second": 109.799, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.583743842364532e-05, |
|
"loss": 0.7014, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy": 0.7146722078323364, |
|
"eval_loss": 0.6794761419296265, |
|
"eval_runtime": 7.1353, |
|
"eval_samples_per_second": 880.696, |
|
"eval_steps_per_second": 110.157, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.7130808234214783, |
|
"eval_loss": 0.6790280938148499, |
|
"eval_runtime": 7.1362, |
|
"eval_samples_per_second": 880.579, |
|
"eval_steps_per_second": 110.142, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.522167487684729e-05, |
|
"loss": 0.6979, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7159452438354492, |
|
"eval_loss": 0.6799929738044739, |
|
"eval_runtime": 7.1383, |
|
"eval_samples_per_second": 880.323, |
|
"eval_steps_per_second": 110.11, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7122851610183716, |
|
"eval_loss": 0.674569845199585, |
|
"eval_runtime": 7.1445, |
|
"eval_samples_per_second": 879.554, |
|
"eval_steps_per_second": 110.014, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.7169000506401062, |
|
"eval_loss": 0.6750736832618713, |
|
"eval_runtime": 7.1334, |
|
"eval_samples_per_second": 880.923, |
|
"eval_steps_per_second": 110.185, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.4605911330049265e-05, |
|
"loss": 0.6886, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.7151495814323425, |
|
"eval_loss": 0.675022542476654, |
|
"eval_runtime": 7.1463, |
|
"eval_samples_per_second": 879.333, |
|
"eval_steps_per_second": 109.987, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_accuracy": 0.7137174010276794, |
|
"eval_loss": 0.6786561012268066, |
|
"eval_runtime": 7.1561, |
|
"eval_samples_per_second": 878.138, |
|
"eval_steps_per_second": 109.837, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.399014778325123e-05, |
|
"loss": 0.6733, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.7189688086509705, |
|
"eval_loss": 0.6791766881942749, |
|
"eval_runtime": 7.1682, |
|
"eval_samples_per_second": 876.651, |
|
"eval_steps_per_second": 109.651, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.7149904370307922, |
|
"eval_loss": 0.680040717124939, |
|
"eval_runtime": 7.1467, |
|
"eval_samples_per_second": 879.292, |
|
"eval_steps_per_second": 109.981, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.7148312926292419, |
|
"eval_loss": 0.6780914664268494, |
|
"eval_runtime": 7.1572, |
|
"eval_samples_per_second": 878.0, |
|
"eval_steps_per_second": 109.82, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3374384236453204e-05, |
|
"loss": 0.6685, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.7191279530525208, |
|
"eval_loss": 0.6794589161872864, |
|
"eval_runtime": 7.1322, |
|
"eval_samples_per_second": 881.071, |
|
"eval_steps_per_second": 110.204, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.7188096642494202, |
|
"eval_loss": 0.6752949953079224, |
|
"eval_runtime": 7.134, |
|
"eval_samples_per_second": 880.857, |
|
"eval_steps_per_second": 110.177, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.275862068965517e-05, |
|
"loss": 0.6682, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.7194462418556213, |
|
"eval_loss": 0.6764330863952637, |
|
"eval_runtime": 7.1203, |
|
"eval_samples_per_second": 882.545, |
|
"eval_steps_per_second": 110.388, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.7172183394432068, |
|
"eval_loss": 0.6686244010925293, |
|
"eval_runtime": 7.1712, |
|
"eval_samples_per_second": 876.284, |
|
"eval_steps_per_second": 109.605, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.7184914350509644, |
|
"eval_loss": 0.6764801144599915, |
|
"eval_runtime": 7.1495, |
|
"eval_samples_per_second": 878.938, |
|
"eval_steps_per_second": 109.937, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.6764, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.7191279530525208, |
|
"eval_loss": 0.6725979447364807, |
|
"eval_runtime": 7.1706, |
|
"eval_samples_per_second": 876.361, |
|
"eval_steps_per_second": 109.615, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_accuracy": 0.7191279530525208, |
|
"eval_loss": 0.6702936291694641, |
|
"eval_runtime": 7.1416, |
|
"eval_samples_per_second": 879.918, |
|
"eval_steps_per_second": 110.06, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.152709359605912e-05, |
|
"loss": 0.6693, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.7226288914680481, |
|
"eval_loss": 0.6674948930740356, |
|
"eval_runtime": 7.1555, |
|
"eval_samples_per_second": 878.209, |
|
"eval_steps_per_second": 109.846, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.7232654094696045, |
|
"eval_loss": 0.6659447550773621, |
|
"eval_runtime": 7.1241, |
|
"eval_samples_per_second": 882.079, |
|
"eval_steps_per_second": 110.33, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.7235836982727051, |
|
"eval_loss": 0.6667641997337341, |
|
"eval_runtime": 7.132, |
|
"eval_samples_per_second": 881.1, |
|
"eval_steps_per_second": 110.208, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.0911330049261084e-05, |
|
"loss": 0.6736, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_accuracy": 0.7205601334571838, |
|
"eval_loss": 0.6661719679832458, |
|
"eval_runtime": 7.1184, |
|
"eval_samples_per_second": 882.786, |
|
"eval_steps_per_second": 110.419, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.7205601334571838, |
|
"eval_loss": 0.6669681668281555, |
|
"eval_runtime": 7.1374, |
|
"eval_samples_per_second": 880.427, |
|
"eval_steps_per_second": 110.123, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.0295566502463057e-05, |
|
"loss": 0.6718, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.7204009890556335, |
|
"eval_loss": 0.6656709313392639, |
|
"eval_runtime": 7.1161, |
|
"eval_samples_per_second": 883.063, |
|
"eval_steps_per_second": 110.453, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.7242202162742615, |
|
"eval_loss": 0.66374671459198, |
|
"eval_runtime": 7.1264, |
|
"eval_samples_per_second": 881.788, |
|
"eval_steps_per_second": 110.294, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_accuracy": 0.720241904258728, |
|
"eval_loss": 0.6692517995834351, |
|
"eval_runtime": 7.1197, |
|
"eval_samples_per_second": 882.62, |
|
"eval_steps_per_second": 110.398, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.9679802955665027e-05, |
|
"loss": 0.656, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_accuracy": 0.7210375666618347, |
|
"eval_loss": 0.6680518984794617, |
|
"eval_runtime": 7.1182, |
|
"eval_samples_per_second": 882.811, |
|
"eval_steps_per_second": 110.422, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_accuracy": 0.723106324672699, |
|
"eval_loss": 0.6737614274024963, |
|
"eval_runtime": 7.1146, |
|
"eval_samples_per_second": 883.25, |
|
"eval_steps_per_second": 110.477, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.9064039408866993e-05, |
|
"loss": 0.6429, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": 0.7194462418556213, |
|
"eval_loss": 0.6714398860931396, |
|
"eval_runtime": 7.1257, |
|
"eval_samples_per_second": 881.881, |
|
"eval_steps_per_second": 110.305, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_accuracy": 0.7216740846633911, |
|
"eval_loss": 0.6679774522781372, |
|
"eval_runtime": 7.124, |
|
"eval_samples_per_second": 882.094, |
|
"eval_steps_per_second": 110.332, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_accuracy": 0.7210375666618347, |
|
"eval_loss": 0.6685846447944641, |
|
"eval_runtime": 7.1342, |
|
"eval_samples_per_second": 880.83, |
|
"eval_steps_per_second": 110.174, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.844827586206897e-05, |
|
"loss": 0.6464, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_accuracy": 0.7270846366882324, |
|
"eval_loss": 0.670677125453949, |
|
"eval_runtime": 7.1152, |
|
"eval_samples_per_second": 883.18, |
|
"eval_steps_per_second": 110.468, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_accuracy": 0.7251750230789185, |
|
"eval_loss": 0.6647821068763733, |
|
"eval_runtime": 7.1238, |
|
"eval_samples_per_second": 882.114, |
|
"eval_steps_per_second": 110.334, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.7832512315270936e-05, |
|
"loss": 0.6428, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.7246976494789124, |
|
"eval_loss": 0.6627594232559204, |
|
"eval_runtime": 7.13, |
|
"eval_samples_per_second": 881.351, |
|
"eval_steps_per_second": 110.239, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_accuracy": 0.7239019870758057, |
|
"eval_loss": 0.664979100227356, |
|
"eval_runtime": 7.1457, |
|
"eval_samples_per_second": 879.411, |
|
"eval_steps_per_second": 109.996, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_accuracy": 0.7256524562835693, |
|
"eval_loss": 0.6631101965904236, |
|
"eval_runtime": 7.128, |
|
"eval_samples_per_second": 881.597, |
|
"eval_steps_per_second": 110.27, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.7216748768472906e-05, |
|
"loss": 0.6404, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_accuracy": 0.7235836982727051, |
|
"eval_loss": 0.6692806482315063, |
|
"eval_runtime": 7.1275, |
|
"eval_samples_per_second": 881.66, |
|
"eval_steps_per_second": 110.278, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_accuracy": 0.72835773229599, |
|
"eval_loss": 0.6607570648193359, |
|
"eval_runtime": 7.1216, |
|
"eval_samples_per_second": 882.388, |
|
"eval_steps_per_second": 110.369, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.660098522167488e-05, |
|
"loss": 0.6395, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_accuracy": 0.7259707450866699, |
|
"eval_loss": 0.6591860055923462, |
|
"eval_runtime": 7.1318, |
|
"eval_samples_per_second": 881.119, |
|
"eval_steps_per_second": 110.21, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_accuracy": 0.7277212142944336, |
|
"eval_loss": 0.657184898853302, |
|
"eval_runtime": 7.1172, |
|
"eval_samples_per_second": 882.928, |
|
"eval_steps_per_second": 110.436, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_accuracy": 0.7291533946990967, |
|
"eval_loss": 0.6624195575714111, |
|
"eval_runtime": 7.1289, |
|
"eval_samples_per_second": 881.476, |
|
"eval_steps_per_second": 110.255, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.598522167487685e-05, |
|
"loss": 0.6374, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.729312539100647, |
|
"eval_loss": 0.6546571254730225, |
|
"eval_runtime": 7.1204, |
|
"eval_samples_per_second": 882.541, |
|
"eval_steps_per_second": 110.388, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_accuracy": 0.7278803586959839, |
|
"eval_loss": 0.6573053598403931, |
|
"eval_runtime": 7.1365, |
|
"eval_samples_per_second": 880.549, |
|
"eval_steps_per_second": 110.139, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.5369458128078822e-05, |
|
"loss": 0.646, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_accuracy": 0.7258116006851196, |
|
"eval_loss": 0.6613573431968689, |
|
"eval_runtime": 7.1306, |
|
"eval_samples_per_second": 881.266, |
|
"eval_steps_per_second": 110.228, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.7281985878944397, |
|
"eval_loss": 0.6586018800735474, |
|
"eval_runtime": 7.1343, |
|
"eval_samples_per_second": 880.812, |
|
"eval_steps_per_second": 110.172, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.727402925491333, |
|
"eval_loss": 0.6686715483665466, |
|
"eval_runtime": 7.1093, |
|
"eval_samples_per_second": 883.91, |
|
"eval_steps_per_second": 110.559, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.475369458128079e-05, |
|
"loss": 0.629, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_accuracy": 0.7250159382820129, |
|
"eval_loss": 0.66302090883255, |
|
"eval_runtime": 7.134, |
|
"eval_samples_per_second": 880.855, |
|
"eval_steps_per_second": 110.177, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_accuracy": 0.7280394434928894, |
|
"eval_loss": 0.667898416519165, |
|
"eval_runtime": 7.117, |
|
"eval_samples_per_second": 882.951, |
|
"eval_steps_per_second": 110.439, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 2.413793103448276e-05, |
|
"loss": 0.617, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_accuracy": 0.7280394434928894, |
|
"eval_loss": 0.6668043732643127, |
|
"eval_runtime": 7.1304, |
|
"eval_samples_per_second": 881.295, |
|
"eval_steps_per_second": 110.232, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_accuracy": 0.72835773229599, |
|
"eval_loss": 0.6602988839149475, |
|
"eval_runtime": 7.1165, |
|
"eval_samples_per_second": 883.014, |
|
"eval_steps_per_second": 110.447, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_accuracy": 0.731699526309967, |
|
"eval_loss": 0.6601841449737549, |
|
"eval_runtime": 7.1229, |
|
"eval_samples_per_second": 882.224, |
|
"eval_steps_per_second": 110.348, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 2.3522167487684728e-05, |
|
"loss": 0.6239, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_accuracy": 0.7299490571022034, |
|
"eval_loss": 0.6645928025245667, |
|
"eval_runtime": 7.1079, |
|
"eval_samples_per_second": 884.089, |
|
"eval_steps_per_second": 110.581, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_accuracy": 0.7286760210990906, |
|
"eval_loss": 0.6626018285751343, |
|
"eval_runtime": 7.1349, |
|
"eval_samples_per_second": 880.737, |
|
"eval_steps_per_second": 110.162, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 2.29064039408867e-05, |
|
"loss": 0.6206, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_accuracy": 0.7262889742851257, |
|
"eval_loss": 0.6620416045188904, |
|
"eval_runtime": 7.1083, |
|
"eval_samples_per_second": 884.033, |
|
"eval_steps_per_second": 110.574, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_accuracy": 0.7259707450866699, |
|
"eval_loss": 0.6685923933982849, |
|
"eval_runtime": 7.1136, |
|
"eval_samples_per_second": 883.379, |
|
"eval_steps_per_second": 110.493, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_accuracy": 0.7312221527099609, |
|
"eval_loss": 0.6637505292892456, |
|
"eval_runtime": 7.0986, |
|
"eval_samples_per_second": 885.242, |
|
"eval_steps_per_second": 110.726, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.229064039408867e-05, |
|
"loss": 0.614, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_accuracy": 0.7297899723052979, |
|
"eval_loss": 0.6602672338485718, |
|
"eval_runtime": 7.1489, |
|
"eval_samples_per_second": 879.01, |
|
"eval_steps_per_second": 109.946, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_accuracy": 0.7278803586959839, |
|
"eval_loss": 0.662921667098999, |
|
"eval_runtime": 7.105, |
|
"eval_samples_per_second": 884.45, |
|
"eval_steps_per_second": 110.627, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 2.1674876847290644e-05, |
|
"loss": 0.6193, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_accuracy": 0.7323361039161682, |
|
"eval_loss": 0.6599108576774597, |
|
"eval_runtime": 7.1371, |
|
"eval_samples_per_second": 880.464, |
|
"eval_steps_per_second": 110.128, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_accuracy": 0.7286760210990906, |
|
"eval_loss": 0.6652523279190063, |
|
"eval_runtime": 7.1062, |
|
"eval_samples_per_second": 884.296, |
|
"eval_steps_per_second": 110.607, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_accuracy": 0.7288351655006409, |
|
"eval_loss": 0.6546856760978699, |
|
"eval_runtime": 7.1178, |
|
"eval_samples_per_second": 882.851, |
|
"eval_steps_per_second": 110.427, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 2.105911330049261e-05, |
|
"loss": 0.6226, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_accuracy": 0.730267345905304, |
|
"eval_loss": 0.6600866913795471, |
|
"eval_runtime": 7.0969, |
|
"eval_samples_per_second": 885.457, |
|
"eval_steps_per_second": 110.753, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_accuracy": 0.72835773229599, |
|
"eval_loss": 0.6599671244621277, |
|
"eval_runtime": 7.1128, |
|
"eval_samples_per_second": 883.483, |
|
"eval_steps_per_second": 110.506, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 2.0443349753694584e-05, |
|
"loss": 0.6158, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_accuracy": 0.7304264903068542, |
|
"eval_loss": 0.6600461602210999, |
|
"eval_runtime": 7.1187, |
|
"eval_samples_per_second": 882.74, |
|
"eval_steps_per_second": 110.413, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_accuracy": 0.7256524562835693, |
|
"eval_loss": 0.6626370549201965, |
|
"eval_runtime": 7.1145, |
|
"eval_samples_per_second": 883.27, |
|
"eval_steps_per_second": 110.479, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_accuracy": 0.7334500551223755, |
|
"eval_loss": 0.6616737842559814, |
|
"eval_runtime": 7.1091, |
|
"eval_samples_per_second": 883.933, |
|
"eval_steps_per_second": 110.562, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.9827586206896554e-05, |
|
"loss": 0.6115, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_accuracy": 0.7310630083084106, |
|
"eval_loss": 0.6600754261016846, |
|
"eval_runtime": 7.1175, |
|
"eval_samples_per_second": 882.895, |
|
"eval_steps_per_second": 110.432, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_accuracy": 0.7340865731239319, |
|
"eval_loss": 0.6601821780204773, |
|
"eval_runtime": 7.1275, |
|
"eval_samples_per_second": 881.653, |
|
"eval_steps_per_second": 110.277, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.921182266009852e-05, |
|
"loss": 0.6057, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_accuracy": 0.7321769595146179, |
|
"eval_loss": 0.6605399250984192, |
|
"eval_runtime": 7.1294, |
|
"eval_samples_per_second": 881.424, |
|
"eval_steps_per_second": 110.248, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_accuracy": 0.7288351655006409, |
|
"eval_loss": 0.6598327159881592, |
|
"eval_runtime": 7.119, |
|
"eval_samples_per_second": 882.705, |
|
"eval_steps_per_second": 110.408, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_accuracy": 0.7297899723052979, |
|
"eval_loss": 0.6586458683013916, |
|
"eval_runtime": 7.1237, |
|
"eval_samples_per_second": 882.12, |
|
"eval_steps_per_second": 110.335, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 1.8596059113300493e-05, |
|
"loss": 0.5992, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"eval_accuracy": 0.7342457175254822, |
|
"eval_loss": 0.6593834161758423, |
|
"eval_runtime": 7.1555, |
|
"eval_samples_per_second": 878.205, |
|
"eval_steps_per_second": 109.846, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"eval_accuracy": 0.733609139919281, |
|
"eval_loss": 0.659511923789978, |
|
"eval_runtime": 7.1271, |
|
"eval_samples_per_second": 881.705, |
|
"eval_steps_per_second": 110.283, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.7980295566502463e-05, |
|
"loss": 0.6028, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.7299490571022034, |
|
"eval_loss": 0.6572112441062927, |
|
"eval_runtime": 7.1318, |
|
"eval_samples_per_second": 881.126, |
|
"eval_steps_per_second": 110.211, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_accuracy": 0.7334500551223755, |
|
"eval_loss": 0.6614532470703125, |
|
"eval_runtime": 7.1291, |
|
"eval_samples_per_second": 881.454, |
|
"eval_steps_per_second": 110.252, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_accuracy": 0.7304264903068542, |
|
"eval_loss": 0.6609640717506409, |
|
"eval_runtime": 7.1218, |
|
"eval_samples_per_second": 882.36, |
|
"eval_steps_per_second": 110.365, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.7364532019704436e-05, |
|
"loss": 0.6058, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_accuracy": 0.7310630083084106, |
|
"eval_loss": 0.6593703031539917, |
|
"eval_runtime": 7.1821, |
|
"eval_samples_per_second": 874.951, |
|
"eval_steps_per_second": 109.438, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_accuracy": 0.7334500551223755, |
|
"eval_loss": 0.6582794785499573, |
|
"eval_runtime": 7.1156, |
|
"eval_samples_per_second": 883.128, |
|
"eval_steps_per_second": 110.461, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.6748768472906403e-05, |
|
"loss": 0.5985, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_accuracy": 0.7304264903068542, |
|
"eval_loss": 0.6584789752960205, |
|
"eval_runtime": 7.1255, |
|
"eval_samples_per_second": 881.898, |
|
"eval_steps_per_second": 110.307, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_accuracy": 0.7323361039161682, |
|
"eval_loss": 0.6626281142234802, |
|
"eval_runtime": 7.1065, |
|
"eval_samples_per_second": 884.263, |
|
"eval_steps_per_second": 110.603, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_accuracy": 0.727402925491333, |
|
"eval_loss": 0.6593265533447266, |
|
"eval_runtime": 7.1566, |
|
"eval_samples_per_second": 878.068, |
|
"eval_steps_per_second": 109.828, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.6133004926108376e-05, |
|
"loss": 0.6, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.7328134775161743, |
|
"eval_loss": 0.6583454012870789, |
|
"eval_runtime": 7.1076, |
|
"eval_samples_per_second": 884.119, |
|
"eval_steps_per_second": 110.585, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_accuracy": 0.732654333114624, |
|
"eval_loss": 0.6581458449363708, |
|
"eval_runtime": 7.1149, |
|
"eval_samples_per_second": 883.214, |
|
"eval_steps_per_second": 110.472, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.5517241379310346e-05, |
|
"loss": 0.6009, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.7334500551223755, |
|
"eval_loss": 0.6602644324302673, |
|
"eval_runtime": 7.1115, |
|
"eval_samples_per_second": 883.644, |
|
"eval_steps_per_second": 110.526, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_accuracy": 0.7315404415130615, |
|
"eval_loss": 0.6539360284805298, |
|
"eval_runtime": 7.1218, |
|
"eval_samples_per_second": 882.358, |
|
"eval_steps_per_second": 110.365, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.733609139919281, |
|
"eval_loss": 0.6602201461791992, |
|
"eval_runtime": 7.1034, |
|
"eval_samples_per_second": 884.646, |
|
"eval_steps_per_second": 110.651, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.4901477832512317e-05, |
|
"loss": 0.6013, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_accuracy": 0.732654333114624, |
|
"eval_loss": 0.6585041284561157, |
|
"eval_runtime": 7.1174, |
|
"eval_samples_per_second": 882.908, |
|
"eval_steps_per_second": 110.434, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_accuracy": 0.7301082015037537, |
|
"eval_loss": 0.659524142742157, |
|
"eval_runtime": 7.0987, |
|
"eval_samples_per_second": 885.233, |
|
"eval_steps_per_second": 110.725, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.5937, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_accuracy": 0.7318586707115173, |
|
"eval_loss": 0.6640517711639404, |
|
"eval_runtime": 7.1062, |
|
"eval_samples_per_second": 884.293, |
|
"eval_steps_per_second": 110.607, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"eval_accuracy": 0.7324952483177185, |
|
"eval_loss": 0.6623978018760681, |
|
"eval_runtime": 7.1076, |
|
"eval_samples_per_second": 884.119, |
|
"eval_steps_per_second": 110.585, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_accuracy": 0.7321769595146179, |
|
"eval_loss": 0.6652867794036865, |
|
"eval_runtime": 7.1683, |
|
"eval_samples_per_second": 876.634, |
|
"eval_steps_per_second": 109.649, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.3669950738916257e-05, |
|
"loss": 0.5779, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy": 0.7324952483177185, |
|
"eval_loss": 0.6649503111839294, |
|
"eval_runtime": 7.1045, |
|
"eval_samples_per_second": 884.505, |
|
"eval_steps_per_second": 110.634, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_accuracy": 0.7332909107208252, |
|
"eval_loss": 0.6616361141204834, |
|
"eval_runtime": 7.114, |
|
"eval_samples_per_second": 883.327, |
|
"eval_steps_per_second": 110.486, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.3054187192118228e-05, |
|
"loss": 0.5809, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_accuracy": 0.7312221527099609, |
|
"eval_loss": 0.6653700470924377, |
|
"eval_runtime": 7.1061, |
|
"eval_samples_per_second": 884.315, |
|
"eval_steps_per_second": 110.61, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"step": 30000, |
|
"total_flos": 4.469740936607232e+16, |
|
"train_loss": 0.6925110097249348, |
|
"train_runtime": 5542.0479, |
|
"train_samples_per_second": 468.744, |
|
"train_steps_per_second": 7.326 |
|
} |
|
], |
|
"max_steps": 40600, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.469740936607232e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|