|
{ |
|
"best_metric": 0.1643463671207428, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.212630236019562, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00042526047203912394, |
|
"eval_loss": 0.6095050573348999, |
|
"eval_runtime": 72.3902, |
|
"eval_samples_per_second": 13.676, |
|
"eval_steps_per_second": 3.426, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00425260472039124, |
|
"grad_norm": 0.8989177346229553, |
|
"learning_rate": 4.2600000000000005e-05, |
|
"loss": 0.2704, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00850520944078248, |
|
"grad_norm": 0.80766361951828, |
|
"learning_rate": 8.520000000000001e-05, |
|
"loss": 0.2043, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01275781416117372, |
|
"grad_norm": 1.4762111902236938, |
|
"learning_rate": 0.0001278, |
|
"loss": 0.2043, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01701041888156496, |
|
"grad_norm": 1.0572073459625244, |
|
"learning_rate": 0.00017040000000000002, |
|
"loss": 0.2232, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0212630236019562, |
|
"grad_norm": 4.580044269561768, |
|
"learning_rate": 0.000213, |
|
"loss": 0.2669, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0212630236019562, |
|
"eval_loss": 0.2905704379081726, |
|
"eval_runtime": 72.2977, |
|
"eval_samples_per_second": 13.693, |
|
"eval_steps_per_second": 3.43, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02551562832234744, |
|
"grad_norm": 0.48462969064712524, |
|
"learning_rate": 0.00021274057135267128, |
|
"loss": 0.1732, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02976823304273868, |
|
"grad_norm": 0.7520514726638794, |
|
"learning_rate": 0.00021196354932097723, |
|
"loss": 0.1737, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03402083776312992, |
|
"grad_norm": 0.733476996421814, |
|
"learning_rate": 0.0002106727194781503, |
|
"loss": 0.1691, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03827344248352116, |
|
"grad_norm": 1.303952932357788, |
|
"learning_rate": 0.00020887437061743096, |
|
"loss": 0.2693, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0425260472039124, |
|
"grad_norm": 2.432600975036621, |
|
"learning_rate": 0.00020657726411369925, |
|
"loss": 0.2541, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0425260472039124, |
|
"eval_loss": 0.2186073362827301, |
|
"eval_runtime": 72.3365, |
|
"eval_samples_per_second": 13.686, |
|
"eval_steps_per_second": 3.428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04677865192430364, |
|
"grad_norm": 0.3898923099040985, |
|
"learning_rate": 0.000203792591238937, |
|
"loss": 0.1503, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05103125664469488, |
|
"grad_norm": 0.5413680672645569, |
|
"learning_rate": 0.0002005339186394757, |
|
"loss": 0.1434, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05528386136508612, |
|
"grad_norm": 0.783905029296875, |
|
"learning_rate": 0.00019681712224065936, |
|
"loss": 0.1696, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05953646608547736, |
|
"grad_norm": 1.4847009181976318, |
|
"learning_rate": 0.0001926603099009319, |
|
"loss": 0.2174, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0637890708058686, |
|
"grad_norm": 2.3255372047424316, |
|
"learning_rate": 0.00018808373319217114, |
|
"loss": 0.275, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0637890708058686, |
|
"eval_loss": 0.2177441269159317, |
|
"eval_runtime": 72.4141, |
|
"eval_samples_per_second": 13.671, |
|
"eval_steps_per_second": 3.425, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06804167552625984, |
|
"grad_norm": 0.3854157626628876, |
|
"learning_rate": 0.00018310968873606635, |
|
"loss": 0.1658, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07229428024665108, |
|
"grad_norm": 0.4845024049282074, |
|
"learning_rate": 0.0001777624095772184, |
|
"loss": 0.1831, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07654688496704232, |
|
"grad_norm": 0.9749467968940735, |
|
"learning_rate": 0.0001720679471221826, |
|
"loss": 0.1933, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08079948968743356, |
|
"grad_norm": 0.8443153500556946, |
|
"learning_rate": 0.00016605404421963453, |
|
"loss": 0.2047, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0850520944078248, |
|
"grad_norm": 1.4500705003738403, |
|
"learning_rate": 0.00015975, |
|
"loss": 0.2931, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0850520944078248, |
|
"eval_loss": 0.20021717250347137, |
|
"eval_runtime": 72.6074, |
|
"eval_samples_per_second": 13.635, |
|
"eval_steps_per_second": 3.416, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08930469912821604, |
|
"grad_norm": 0.5776247978210449, |
|
"learning_rate": 0.00015318652713303674, |
|
"loss": 0.16, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09355730384860728, |
|
"grad_norm": 0.5547011494636536, |
|
"learning_rate": 0.00014639560219879464, |
|
"loss": 0.1446, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09780990856899852, |
|
"grad_norm": 0.702063798904419, |
|
"learning_rate": 0.0001394103099009319, |
|
"loss": 0.1747, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10206251328938976, |
|
"grad_norm": 0.9604615569114685, |
|
"learning_rate": 0.0001322646818813646, |
|
"loss": 0.2264, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.106315118009781, |
|
"grad_norm": 2.0104331970214844, |
|
"learning_rate": 0.0001249935309215281, |
|
"loss": 0.254, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.106315118009781, |
|
"eval_loss": 0.1977642923593521, |
|
"eval_runtime": 72.6924, |
|
"eval_samples_per_second": 13.619, |
|
"eval_steps_per_second": 3.412, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11056772273017224, |
|
"grad_norm": 0.3492359220981598, |
|
"learning_rate": 0.0001176322813380051, |
|
"loss": 0.1203, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11482032745056348, |
|
"grad_norm": 0.4210224449634552, |
|
"learning_rate": 0.00011021679639881638, |
|
"loss": 0.1282, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11907293217095472, |
|
"grad_norm": 0.927093505859375, |
|
"learning_rate": 0.00010278320360118368, |
|
"loss": 0.1669, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12332553689134595, |
|
"grad_norm": 1.108055830001831, |
|
"learning_rate": 9.536771866199493e-05, |
|
"loss": 0.1855, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1275781416117372, |
|
"grad_norm": 1.8096457719802856, |
|
"learning_rate": 8.800646907847192e-05, |
|
"loss": 0.2914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1275781416117372, |
|
"eval_loss": 0.187950000166893, |
|
"eval_runtime": 72.7518, |
|
"eval_samples_per_second": 13.608, |
|
"eval_steps_per_second": 3.409, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13183074633212843, |
|
"grad_norm": 0.4479442834854126, |
|
"learning_rate": 8.07353181186354e-05, |
|
"loss": 0.124, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13608335105251967, |
|
"grad_norm": 0.41207781434059143, |
|
"learning_rate": 7.35896900990681e-05, |
|
"loss": 0.1531, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14033595577291091, |
|
"grad_norm": 0.9523985981941223, |
|
"learning_rate": 6.660439780120536e-05, |
|
"loss": 0.1732, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14458856049330215, |
|
"grad_norm": 1.1972426176071167, |
|
"learning_rate": 5.981347286696324e-05, |
|
"loss": 0.1977, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1488411652136934, |
|
"grad_norm": 1.7219096422195435, |
|
"learning_rate": 5.325000000000002e-05, |
|
"loss": 0.1944, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1488411652136934, |
|
"eval_loss": 0.17624303698539734, |
|
"eval_runtime": 72.6437, |
|
"eval_samples_per_second": 13.628, |
|
"eval_steps_per_second": 3.414, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15309376993408463, |
|
"grad_norm": 0.34478458762168884, |
|
"learning_rate": 4.6945955780365475e-05, |
|
"loss": 0.1069, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15734637465447587, |
|
"grad_norm": 0.7543050646781921, |
|
"learning_rate": 4.0932052877817393e-05, |
|
"loss": 0.1509, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1615989793748671, |
|
"grad_norm": 0.7749956846237183, |
|
"learning_rate": 3.523759042278163e-05, |
|
"loss": 0.1447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16585158409525835, |
|
"grad_norm": 0.8737692832946777, |
|
"learning_rate": 2.989031126393367e-05, |
|
"loss": 0.2315, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1701041888156496, |
|
"grad_norm": 1.4741559028625488, |
|
"learning_rate": 2.4916266807828855e-05, |
|
"loss": 0.234, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1701041888156496, |
|
"eval_loss": 0.16768178343772888, |
|
"eval_runtime": 72.7753, |
|
"eval_samples_per_second": 13.604, |
|
"eval_steps_per_second": 3.408, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17435679353604083, |
|
"grad_norm": 0.32338669896125793, |
|
"learning_rate": 2.033969009906811e-05, |
|
"loss": 0.1204, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.17860939825643207, |
|
"grad_norm": 0.5574338436126709, |
|
"learning_rate": 1.6182877759340637e-05, |
|
"loss": 0.1627, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1828620029768233, |
|
"grad_norm": 0.619001030921936, |
|
"learning_rate": 1.2466081360524275e-05, |
|
"loss": 0.1113, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18711460769721455, |
|
"grad_norm": 0.8440791368484497, |
|
"learning_rate": 9.207408761062996e-06, |
|
"loss": 0.1754, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1913672124176058, |
|
"grad_norm": 1.8180017471313477, |
|
"learning_rate": 6.422735886300764e-06, |
|
"loss": 0.2486, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1913672124176058, |
|
"eval_loss": 0.16511203348636627, |
|
"eval_runtime": 72.2907, |
|
"eval_samples_per_second": 13.695, |
|
"eval_steps_per_second": 3.431, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19561981713799703, |
|
"grad_norm": 0.3398416042327881, |
|
"learning_rate": 4.125629382569038e-06, |
|
"loss": 0.1377, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19987242185838827, |
|
"grad_norm": 0.6524055004119873, |
|
"learning_rate": 2.327280521849694e-06, |
|
"loss": 0.1348, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2041250265787795, |
|
"grad_norm": 0.5883779525756836, |
|
"learning_rate": 1.0364506790227565e-06, |
|
"loss": 0.1782, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.20837763129917075, |
|
"grad_norm": 0.6829372644424438, |
|
"learning_rate": 2.5942864732872295e-07, |
|
"loss": 0.181, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.212630236019562, |
|
"grad_norm": 1.7673338651657104, |
|
"learning_rate": 0.0, |
|
"loss": 0.2541, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.212630236019562, |
|
"eval_loss": 0.1643463671207428, |
|
"eval_runtime": 72.4913, |
|
"eval_samples_per_second": 13.657, |
|
"eval_steps_per_second": 3.421, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1238426161656627e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|