{ "best_metric": 1.151829481124878, "best_model_checkpoint": "miner_id_24/checkpoint-250", "epoch": 0.17458100558659218, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006983240223463687, "eval_loss": 1.370945930480957, "eval_runtime": 51.907, "eval_samples_per_second": 11.617, "eval_steps_per_second": 2.909, "step": 1 }, { "epoch": 0.006983240223463687, "grad_norm": 0.8086255788803101, "learning_rate": 4.34e-05, "loss": 1.074, "step": 10 }, { "epoch": 0.013966480446927373, "grad_norm": 0.5126949548721313, "learning_rate": 8.68e-05, "loss": 0.9041, "step": 20 }, { "epoch": 0.02094972067039106, "grad_norm": 0.6224434971809387, "learning_rate": 0.0001302, "loss": 0.9133, "step": 30 }, { "epoch": 0.027932960893854747, "grad_norm": 0.596337616443634, "learning_rate": 0.0001736, "loss": 0.8771, "step": 40 }, { "epoch": 0.034916201117318434, "grad_norm": 1.162047266960144, "learning_rate": 0.000217, "loss": 2.273, "step": 50 }, { "epoch": 0.034916201117318434, "eval_loss": 1.2856680154800415, "eval_runtime": 51.8319, "eval_samples_per_second": 11.634, "eval_steps_per_second": 2.913, "step": 50 }, { "epoch": 0.04189944134078212, "grad_norm": 0.47395944595336914, "learning_rate": 0.00021673569945319091, "loss": 0.9326, "step": 60 }, { "epoch": 0.04888268156424581, "grad_norm": 0.45159703493118286, "learning_rate": 0.00021594408545846038, "loss": 0.864, "step": 70 }, { "epoch": 0.055865921787709494, "grad_norm": 0.48672589659690857, "learning_rate": 0.0002146290146796179, "loss": 0.9053, "step": 80 }, { "epoch": 0.06284916201117319, "grad_norm": 0.6035557389259338, "learning_rate": 0.0002127968940093076, "loss": 0.9917, "step": 90 }, { "epoch": 0.06983240223463687, "grad_norm": 1.352277398109436, "learning_rate": 0.00021045664935527106, "loss": 2.3177, "step": 100 }, { "epoch": 0.06983240223463687, "eval_loss": 1.2862142324447632, "eval_runtime": 51.6109, "eval_samples_per_second": 11.684, "eval_steps_per_second": 2.926, "step": 100 }, { "epoch": 0.07681564245810056, "grad_norm": 0.5764887928962708, "learning_rate": 0.00020761968215422217, "loss": 0.9639, "step": 110 }, { "epoch": 0.08379888268156424, "grad_norm": 0.45929816365242004, "learning_rate": 0.00020429981382519356, "loss": 0.9454, "step": 120 }, { "epoch": 0.09078212290502793, "grad_norm": 0.5289813280105591, "learning_rate": 0.00020051321843297219, "loss": 0.8689, "step": 130 }, { "epoch": 0.09776536312849161, "grad_norm": 0.5550933480262756, "learning_rate": 0.0001962783438896818, "loss": 0.8495, "step": 140 }, { "epoch": 0.10474860335195531, "grad_norm": 1.3829443454742432, "learning_rate": 0.0001916158220784091, "loss": 2.1619, "step": 150 }, { "epoch": 0.10474860335195531, "eval_loss": 1.2346261739730835, "eval_runtime": 51.8194, "eval_samples_per_second": 11.637, "eval_steps_per_second": 2.914, "step": 150 }, { "epoch": 0.11173184357541899, "grad_norm": 0.4873850345611572, "learning_rate": 0.00018654836833674362, "loss": 0.9312, "step": 160 }, { "epoch": 0.11871508379888268, "grad_norm": 0.4591498076915741, "learning_rate": 0.0001811006707899361, "loss": 0.8553, "step": 170 }, { "epoch": 0.12569832402234637, "grad_norm": 0.44848743081092834, "learning_rate": 0.0001752992700728339, "loss": 0.8601, "step": 180 }, { "epoch": 0.13268156424581007, "grad_norm": 0.49805349111557007, "learning_rate": 0.00016917243002657602, "loss": 0.7799, "step": 190 }, { "epoch": 0.13966480446927373, "grad_norm": 1.24271821975708, "learning_rate": 0.00016275, "loss": 1.9841, "step": 200 }, { "epoch": 0.13966480446927373, "eval_loss": 1.2006936073303223, "eval_runtime": 51.8223, "eval_samples_per_second": 11.636, "eval_steps_per_second": 2.914, "step": 200 }, { "epoch": 0.14664804469273743, "grad_norm": 0.45036932826042175, "learning_rate": 0.0001560632694266149, "loss": 0.9129, "step": 210 }, { "epoch": 0.15363128491620112, "grad_norm": 0.4322090446949005, "learning_rate": 0.00014914481538562646, "loss": 0.9577, "step": 220 }, { "epoch": 0.16061452513966482, "grad_norm": 0.4544154703617096, "learning_rate": 0.0001420283438896818, "loss": 0.7483, "step": 230 }, { "epoch": 0.16759776536312848, "grad_norm": 0.6254767179489136, "learning_rate": 0.00013474852567256393, "loss": 0.961, "step": 240 }, { "epoch": 0.17458100558659218, "grad_norm": 1.0982528924942017, "learning_rate": 0.00012734082727686196, "loss": 2.2834, "step": 250 }, { "epoch": 0.17458100558659218, "eval_loss": 1.151829481124878, "eval_runtime": 51.8371, "eval_samples_per_second": 11.633, "eval_steps_per_second": 2.913, "step": 250 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.06500940529664e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }