{ "best_metric": 0.3523136377334595, "best_model_checkpoint": "miner_id_24/checkpoint-250", "epoch": 0.2188183807439825, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00087527352297593, "eval_loss": 0.5409302115440369, "eval_runtime": 47.9717, "eval_samples_per_second": 10.027, "eval_steps_per_second": 2.522, "step": 1 }, { "epoch": 0.0087527352297593, "grad_norm": 0.6175008416175842, "learning_rate": 4.0600000000000004e-05, "loss": 1.102, "step": 10 }, { "epoch": 0.0175054704595186, "grad_norm": 0.5922743082046509, "learning_rate": 8.120000000000001e-05, "loss": 0.7527, "step": 20 }, { "epoch": 0.0262582056892779, "grad_norm": 0.18080410361289978, "learning_rate": 0.00012179999999999999, "loss": 0.2428, "step": 30 }, { "epoch": 0.0350109409190372, "grad_norm": 0.2408462017774582, "learning_rate": 0.00016240000000000002, "loss": 0.0084, "step": 40 }, { "epoch": 0.0437636761487965, "grad_norm": 0.16170749068260193, "learning_rate": 0.000203, "loss": 0.0081, "step": 50 }, { "epoch": 0.0437636761487965, "eval_loss": 0.41450178623199463, "eval_runtime": 48.0897, "eval_samples_per_second": 10.002, "eval_steps_per_second": 2.516, "step": 50 }, { "epoch": 0.0525164113785558, "grad_norm": 0.523928165435791, "learning_rate": 0.00020275275110137215, "loss": 0.6924, "step": 60 }, { "epoch": 0.061269146608315096, "grad_norm": 0.6150461435317993, "learning_rate": 0.00020201220897726938, "loss": 0.5178, "step": 70 }, { "epoch": 0.0700218818380744, "grad_norm": 0.3789159655570984, "learning_rate": 0.00020078198147448128, "loss": 0.2702, "step": 80 }, { "epoch": 0.0787746170678337, "grad_norm": 1.880570411682129, "learning_rate": 0.00019906806213773937, "loss": 0.0048, "step": 90 }, { "epoch": 0.087527352297593, "grad_norm": 0.6373544335365295, "learning_rate": 0.0001968788010097697, "loss": 0.0049, "step": 100 }, { "epoch": 0.087527352297593, "eval_loss": 0.3669174313545227, "eval_runtime": 48.1037, "eval_samples_per_second": 9.999, "eval_steps_per_second": 2.515, "step": 100 }, { "epoch": 0.0962800875273523, "grad_norm": 0.6095497608184814, "learning_rate": 0.00019422486395072398, "loss": 0.8385, "step": 110 }, { "epoch": 0.1050328227571116, "grad_norm": 0.44535937905311584, "learning_rate": 0.0001911191806751811, "loss": 0.5802, "step": 120 }, { "epoch": 0.1137855579868709, "grad_norm": 0.10592010617256165, "learning_rate": 0.00018757688175987723, "loss": 0.1586, "step": 130 }, { "epoch": 0.12253829321663019, "grad_norm": 0.025220032781362534, "learning_rate": 0.00018361522492905716, "loss": 0.0018, "step": 140 }, { "epoch": 0.13129102844638948, "grad_norm": 0.014781222678720951, "learning_rate": 0.00017925351097657625, "loss": 0.002, "step": 150 }, { "epoch": 0.13129102844638948, "eval_loss": 0.36381667852401733, "eval_runtime": 48.1198, "eval_samples_per_second": 9.996, "eval_steps_per_second": 2.515, "step": 150 }, { "epoch": 0.1400437636761488, "grad_norm": 0.5591445565223694, "learning_rate": 0.00017451298973437308, "loss": 0.9412, "step": 160 }, { "epoch": 0.1487964989059081, "grad_norm": 0.30725646018981934, "learning_rate": 0.0001694167565454241, "loss": 0.4974, "step": 170 }, { "epoch": 0.1575492341356674, "grad_norm": 0.11813530325889587, "learning_rate": 0.0001639896397455543, "loss": 0.0992, "step": 180 }, { "epoch": 0.16630196936542668, "grad_norm": 0.009045140817761421, "learning_rate": 0.0001582580797022808, "loss": 0.0109, "step": 190 }, { "epoch": 0.175054704595186, "grad_norm": 0.017792249098420143, "learning_rate": 0.00015225, "loss": 0.0007, "step": 200 }, { "epoch": 0.175054704595186, "eval_loss": 0.35675832629203796, "eval_runtime": 47.8961, "eval_samples_per_second": 10.043, "eval_steps_per_second": 2.526, "step": 200 }, { "epoch": 0.1838074398249453, "grad_norm": 0.5258771777153015, "learning_rate": 0.00014599467139909136, "loss": 0.7887, "step": 210 }, { "epoch": 0.1925601750547046, "grad_norm": 0.5954610109329224, "learning_rate": 0.0001395225692317151, "loss": 0.7499, "step": 220 }, { "epoch": 0.2013129102844639, "grad_norm": 0.0030906260944902897, "learning_rate": 0.00013286522492905717, "loss": 0.1606, "step": 230 }, { "epoch": 0.2100656455142232, "grad_norm": 0.0038282345049083233, "learning_rate": 0.00012605507240336626, "loss": 0.0009, "step": 240 }, { "epoch": 0.2188183807439825, "grad_norm": 0.01913132704794407, "learning_rate": 0.00011912529003319345, "loss": 0.0004, "step": 250 }, { "epoch": 0.2188183807439825, "eval_loss": 0.3523136377334595, "eval_runtime": 48.1668, "eval_samples_per_second": 9.986, "eval_steps_per_second": 2.512, "step": 250 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.17585994072064e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }