{ "best_metric": 1.0717943906784058, "best_model_checkpoint": "miner_id_24/checkpoint-500", "epoch": 0.34916201117318435, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006983240223463687, "eval_loss": 1.370945930480957, "eval_runtime": 51.907, "eval_samples_per_second": 11.617, "eval_steps_per_second": 2.909, "step": 1 }, { "epoch": 0.006983240223463687, "grad_norm": 0.8086255788803101, "learning_rate": 4.34e-05, "loss": 1.074, "step": 10 }, { "epoch": 0.013966480446927373, "grad_norm": 0.5126949548721313, "learning_rate": 8.68e-05, "loss": 0.9041, "step": 20 }, { "epoch": 0.02094972067039106, "grad_norm": 0.6224434971809387, "learning_rate": 0.0001302, "loss": 0.9133, "step": 30 }, { "epoch": 0.027932960893854747, "grad_norm": 0.596337616443634, "learning_rate": 0.0001736, "loss": 0.8771, "step": 40 }, { "epoch": 0.034916201117318434, "grad_norm": 1.162047266960144, "learning_rate": 0.000217, "loss": 2.273, "step": 50 }, { "epoch": 0.034916201117318434, "eval_loss": 1.2856680154800415, "eval_runtime": 51.8319, "eval_samples_per_second": 11.634, "eval_steps_per_second": 2.913, "step": 50 }, { "epoch": 0.04189944134078212, "grad_norm": 0.47395944595336914, "learning_rate": 0.00021673569945319091, "loss": 0.9326, "step": 60 }, { "epoch": 0.04888268156424581, "grad_norm": 0.45159703493118286, "learning_rate": 0.00021594408545846038, "loss": 0.864, "step": 70 }, { "epoch": 0.055865921787709494, "grad_norm": 0.48672589659690857, "learning_rate": 0.0002146290146796179, "loss": 0.9053, "step": 80 }, { "epoch": 0.06284916201117319, "grad_norm": 0.6035557389259338, "learning_rate": 0.0002127968940093076, "loss": 0.9917, "step": 90 }, { "epoch": 0.06983240223463687, "grad_norm": 1.352277398109436, "learning_rate": 0.00021045664935527106, "loss": 2.3177, "step": 100 }, { "epoch": 0.06983240223463687, "eval_loss": 1.2862142324447632, "eval_runtime": 51.6109, "eval_samples_per_second": 11.684, "eval_steps_per_second": 2.926, "step": 100 }, { "epoch": 0.07681564245810056, "grad_norm": 0.5764887928962708, "learning_rate": 0.00020761968215422217, "loss": 0.9639, "step": 110 }, { "epoch": 0.08379888268156424, "grad_norm": 0.45929816365242004, "learning_rate": 0.00020429981382519356, "loss": 0.9454, "step": 120 }, { "epoch": 0.09078212290502793, "grad_norm": 0.5289813280105591, "learning_rate": 0.00020051321843297219, "loss": 0.8689, "step": 130 }, { "epoch": 0.09776536312849161, "grad_norm": 0.5550933480262756, "learning_rate": 0.0001962783438896818, "loss": 0.8495, "step": 140 }, { "epoch": 0.10474860335195531, "grad_norm": 1.3829443454742432, "learning_rate": 0.0001916158220784091, "loss": 2.1619, "step": 150 }, { "epoch": 0.10474860335195531, "eval_loss": 1.2346261739730835, "eval_runtime": 51.8194, "eval_samples_per_second": 11.637, "eval_steps_per_second": 2.914, "step": 150 }, { "epoch": 0.11173184357541899, "grad_norm": 0.4873850345611572, "learning_rate": 0.00018654836833674362, "loss": 0.9312, "step": 160 }, { "epoch": 0.11871508379888268, "grad_norm": 0.4591498076915741, "learning_rate": 0.0001811006707899361, "loss": 0.8553, "step": 170 }, { "epoch": 0.12569832402234637, "grad_norm": 0.44848743081092834, "learning_rate": 0.0001752992700728339, "loss": 0.8601, "step": 180 }, { "epoch": 0.13268156424581007, "grad_norm": 0.49805349111557007, "learning_rate": 0.00016917243002657602, "loss": 0.7799, "step": 190 }, { "epoch": 0.13966480446927373, "grad_norm": 1.24271821975708, "learning_rate": 0.00016275, "loss": 1.9841, "step": 200 }, { "epoch": 0.13966480446927373, "eval_loss": 1.2006936073303223, "eval_runtime": 51.8223, "eval_samples_per_second": 11.636, "eval_steps_per_second": 2.914, "step": 200 }, { "epoch": 0.14664804469273743, "grad_norm": 0.45036932826042175, "learning_rate": 0.0001560632694266149, "loss": 0.9129, "step": 210 }, { "epoch": 0.15363128491620112, "grad_norm": 0.4322090446949005, "learning_rate": 0.00014914481538562646, "loss": 0.9577, "step": 220 }, { "epoch": 0.16061452513966482, "grad_norm": 0.4544154703617096, "learning_rate": 0.0001420283438896818, "loss": 0.7483, "step": 230 }, { "epoch": 0.16759776536312848, "grad_norm": 0.6254767179489136, "learning_rate": 0.00013474852567256393, "loss": 0.961, "step": 240 }, { "epoch": 0.17458100558659218, "grad_norm": 1.0982528924942017, "learning_rate": 0.00012734082727686196, "loss": 2.2834, "step": 250 }, { "epoch": 0.17458100558659218, "eval_loss": 1.151829481124878, "eval_runtime": 51.8371, "eval_samples_per_second": 11.633, "eval_steps_per_second": 2.913, "step": 250 }, { "epoch": 0.18156424581005587, "grad_norm": 0.49490973353385925, "learning_rate": 0.0001198413382645404, "loss": 0.9051, "step": 260 }, { "epoch": 0.18854748603351956, "grad_norm": 0.419166624546051, "learning_rate": 0.00011228659539222137, "loss": 0.856, "step": 270 }, { "epoch": 0.19553072625698323, "grad_norm": 0.44810253381729126, "learning_rate": 0.00010471340460777866, "loss": 0.772, "step": 280 }, { "epoch": 0.20251396648044692, "grad_norm": 0.5431365370750427, "learning_rate": 9.715866173545961e-05, "loss": 0.8117, "step": 290 }, { "epoch": 0.20949720670391062, "grad_norm": 1.623485803604126, "learning_rate": 8.965917272313806e-05, "loss": 1.9584, "step": 300 }, { "epoch": 0.20949720670391062, "eval_loss": 1.1206214427947998, "eval_runtime": 51.6387, "eval_samples_per_second": 11.677, "eval_steps_per_second": 2.924, "step": 300 }, { "epoch": 0.2164804469273743, "grad_norm": 0.4230874478816986, "learning_rate": 8.225147432743606e-05, "loss": 0.9144, "step": 310 }, { "epoch": 0.22346368715083798, "grad_norm": 0.41244614124298096, "learning_rate": 7.497165611031821e-05, "loss": 0.8197, "step": 320 }, { "epoch": 0.23044692737430167, "grad_norm": 0.46321332454681396, "learning_rate": 6.785518461437353e-05, "loss": 0.8108, "step": 330 }, { "epoch": 0.23743016759776536, "grad_norm": 0.4879332184791565, "learning_rate": 6.093673057338509e-05, "loss": 0.8025, "step": 340 }, { "epoch": 0.24441340782122906, "grad_norm": 1.3698660135269165, "learning_rate": 5.4250000000000024e-05, "loss": 2.2907, "step": 350 }, { "epoch": 0.24441340782122906, "eval_loss": 1.0982829332351685, "eval_runtime": 51.8499, "eval_samples_per_second": 11.63, "eval_steps_per_second": 2.912, "step": 350 }, { "epoch": 0.25139664804469275, "grad_norm": 0.40256136655807495, "learning_rate": 4.782756997342398e-05, "loss": 0.8455, "step": 360 }, { "epoch": 0.25837988826815644, "grad_norm": 0.4019988775253296, "learning_rate": 4.170072992716607e-05, "loss": 0.8504, "step": 370 }, { "epoch": 0.26536312849162014, "grad_norm": 0.46585512161254883, "learning_rate": 3.5899329210063916e-05, "loss": 0.7688, "step": 380 }, { "epoch": 0.2723463687150838, "grad_norm": 0.49366164207458496, "learning_rate": 3.045163166325637e-05, "loss": 0.9192, "step": 390 }, { "epoch": 0.27932960893854747, "grad_norm": 1.1542916297912598, "learning_rate": 2.5384177921590895e-05, "loss": 2.3411, "step": 400 }, { "epoch": 0.27932960893854747, "eval_loss": 1.0796287059783936, "eval_runtime": 51.6139, "eval_samples_per_second": 11.683, "eval_steps_per_second": 2.926, "step": 400 }, { "epoch": 0.28631284916201116, "grad_norm": 0.40678611397743225, "learning_rate": 2.0721656110318213e-05, "loss": 0.8682, "step": 410 }, { "epoch": 0.29329608938547486, "grad_norm": 0.38841360807418823, "learning_rate": 1.6486781567027783e-05, "loss": 0.797, "step": 420 }, { "epoch": 0.30027932960893855, "grad_norm": 0.4177733361721039, "learning_rate": 1.2700186174806422e-05, "loss": 0.7461, "step": 430 }, { "epoch": 0.30726256983240224, "grad_norm": 0.5100810527801514, "learning_rate": 9.380317845777794e-06, "loss": 0.8731, "step": 440 }, { "epoch": 0.31424581005586594, "grad_norm": 1.1657980680465698, "learning_rate": 6.543350644728947e-06, "loss": 2.3673, "step": 450 }, { "epoch": 0.31424581005586594, "eval_loss": 1.072546362876892, "eval_runtime": 51.6984, "eval_samples_per_second": 11.664, "eval_steps_per_second": 2.921, "step": 450 }, { "epoch": 0.32122905027932963, "grad_norm": 0.3935563862323761, "learning_rate": 4.2031059906924e-06, "loss": 0.8383, "step": 460 }, { "epoch": 0.32821229050279327, "grad_norm": 0.3855022192001343, "learning_rate": 2.3709853203820825e-06, "loss": 0.7788, "step": 470 }, { "epoch": 0.33519553072625696, "grad_norm": 0.4132440984249115, "learning_rate": 1.0559145415396157e-06, "loss": 0.7524, "step": 480 }, { "epoch": 0.34217877094972066, "grad_norm": 0.4790831208229065, "learning_rate": 2.643005468090745e-07, "loss": 0.8183, "step": 490 }, { "epoch": 0.34916201117318435, "grad_norm": 1.258919596672058, "learning_rate": 0.0, "loss": 2.1397, "step": 500 }, { "epoch": 0.34916201117318435, "eval_loss": 1.0717943906784058, "eval_runtime": 52.0261, "eval_samples_per_second": 11.59, "eval_steps_per_second": 2.902, "step": 500 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.408558478917632e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }