{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4528, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022084805653710248, "grad_norm": 0.1020389199256897, "learning_rate": 6.666666666666667e-06, "loss": 2.5011, "step": 100 }, { "epoch": 0.044169611307420496, "grad_norm": 0.1910451352596283, "learning_rate": 1.3333333333333333e-05, "loss": 2.4789, "step": 200 }, { "epoch": 0.06625441696113074, "grad_norm": 0.26337745785713196, "learning_rate": 2e-05, "loss": 2.4291, "step": 300 }, { "epoch": 0.08833922261484099, "grad_norm": 0.3770335614681244, "learning_rate": 1.9972406933597812e-05, "loss": 2.4186, "step": 400 }, { "epoch": 0.11042402826855123, "grad_norm": 0.36907321214675903, "learning_rate": 1.988978000985394e-05, "loss": 2.3774, "step": 500 }, { "epoch": 0.13250883392226148, "grad_norm": 0.4569687247276306, "learning_rate": 1.9752575214807077e-05, "loss": 2.3992, "step": 600 }, { "epoch": 0.15459363957597172, "grad_norm": 0.4629022777080536, "learning_rate": 1.9561549728661312e-05, "loss": 2.325, "step": 700 }, { "epoch": 0.17667844522968199, "grad_norm": 0.47432681918144226, "learning_rate": 1.9317757747201386e-05, "loss": 2.3013, "step": 800 }, { "epoch": 0.19876325088339222, "grad_norm": 0.46376603841781616, "learning_rate": 1.9022544664093854e-05, "loss": 2.3277, "step": 900 }, { "epoch": 0.22084805653710246, "grad_norm": 0.48484688997268677, "learning_rate": 1.8677539646179706e-05, "loss": 2.314, "step": 1000 }, { "epoch": 0.24293286219081273, "grad_norm": 0.655317485332489, "learning_rate": 1.828464664273263e-05, "loss": 2.3047, "step": 1100 }, { "epoch": 0.26501766784452296, "grad_norm": 0.520976722240448, "learning_rate": 1.7846033878299232e-05, "loss": 2.2971, "step": 1200 }, { "epoch": 0.2871024734982332, "grad_norm": 0.502010703086853, "learning_rate": 1.7364121887106285e-05, "loss": 2.3102, "step": 1300 }, { "epoch": 0.30918727915194344, "grad_norm": 0.5849918127059937, "learning_rate": 1.684157015506839e-05, "loss": 2.2636, "step": 1400 }, { "epoch": 0.33127208480565373, "grad_norm": 0.6814427375793457, "learning_rate": 1.628126244311369e-05, "loss": 2.2854, "step": 1500 }, { "epoch": 0.35335689045936397, "grad_norm": 0.4844237267971039, "learning_rate": 1.5686290872822504e-05, "loss": 2.294, "step": 1600 }, { "epoch": 0.3754416961130742, "grad_norm": 0.6080675721168518, "learning_rate": 1.5059938862204126e-05, "loss": 2.2631, "step": 1700 }, { "epoch": 0.39752650176678445, "grad_norm": 0.561839759349823, "learning_rate": 1.440566300578259e-05, "loss": 2.2489, "step": 1800 }, { "epoch": 0.4196113074204947, "grad_norm": 0.5733577609062195, "learning_rate": 1.3727073998988202e-05, "loss": 2.2505, "step": 1900 }, { "epoch": 0.4416961130742049, "grad_norm": 0.6440421342849731, "learning_rate": 1.3027916712125825e-05, "loss": 2.2341, "step": 2000 }, { "epoch": 0.4637809187279152, "grad_norm": 0.5807141661643982, "learning_rate": 1.2312049523883851e-05, "loss": 2.2842, "step": 2100 }, { "epoch": 0.48586572438162545, "grad_norm": 0.6154150366783142, "learning_rate": 1.1583423028434343e-05, "loss": 2.2756, "step": 2200 }, { "epoch": 0.5079505300353356, "grad_norm": 0.8887423276901245, "learning_rate": 1.0846058233631565e-05, "loss": 2.2388, "step": 2300 }, { "epoch": 0.5300353356890459, "grad_norm": 0.6062945127487183, "learning_rate": 1.0104024370624644e-05, "loss": 2.2708, "step": 2400 }, { "epoch": 0.5521201413427562, "grad_norm": 0.7383410930633545, "learning_rate": 9.361416437344504e-06, "loss": 2.2693, "step": 2500 }, { "epoch": 0.5742049469964664, "grad_norm": 0.7274787425994873, "learning_rate": 8.622332599793906e-06, "loss": 2.2596, "step": 2600 }, { "epoch": 0.5962897526501767, "grad_norm": 0.6965683102607727, "learning_rate": 7.890851575854108e-06, "loss": 2.2652, "step": 2700 }, { "epoch": 0.6183745583038869, "grad_norm": 0.6714246273040771, "learning_rate": 7.171010126418218e-06, "loss": 2.2587, "step": 2800 }, { "epoch": 0.6404593639575972, "grad_norm": 0.7770031690597534, "learning_rate": 6.466780778068903e-06, "loss": 2.2502, "step": 2900 }, { "epoch": 0.6625441696113075, "grad_norm": 0.7164767980575562, "learning_rate": 5.782049900240432e-06, "loss": 2.2477, "step": 3000 }, { "epoch": 0.6846289752650176, "grad_norm": 0.7085398435592651, "learning_rate": 5.120596257848716e-06, "loss": 2.268, "step": 3100 }, { "epoch": 0.7067137809187279, "grad_norm": 0.7723512053489685, "learning_rate": 4.486070157749059e-06, "loss": 2.2363, "step": 3200 }, { "epoch": 0.7287985865724381, "grad_norm": 0.6914446353912354, "learning_rate": 3.881973304104252e-06, "loss": 2.2511, "step": 3300 }, { "epoch": 0.7508833922261484, "grad_norm": 0.6801828742027283, "learning_rate": 3.311639473833487e-06, "loss": 2.2672, "step": 3400 }, { "epoch": 0.7729681978798587, "grad_norm": 0.6551490426063538, "learning_rate": 2.778216118786782e-06, "loss": 2.2574, "step": 3500 }, { "epoch": 0.7950530035335689, "grad_norm": 0.7881369590759277, "learning_rate": 2.2846469961753916e-06, "loss": 2.2728, "step": 3600 }, { "epoch": 0.8171378091872792, "grad_norm": 0.6549363136291504, "learning_rate": 1.8336559231141726e-06, "loss": 2.2253, "step": 3700 }, { "epoch": 0.8392226148409894, "grad_norm": 0.6901439428329468, "learning_rate": 1.4277317449282834e-06, "loss": 2.2392, "step": 3800 }, { "epoch": 0.8613074204946997, "grad_norm": 0.6845853924751282, "learning_rate": 1.0691146001783081e-06, "loss": 2.2545, "step": 3900 }, { "epoch": 0.8833922261484098, "grad_norm": 0.7686012387275696, "learning_rate": 7.597835582018586e-07, "loss": 2.2374, "step": 4000 }, { "epoch": 0.9054770318021201, "grad_norm": 0.6920452117919922, "learning_rate": 5.014456973952375e-07, "loss": 2.249, "step": 4100 }, { "epoch": 0.9275618374558304, "grad_norm": 0.5871976613998413, "learning_rate": 2.9552668450792965e-07, "loss": 2.2629, "step": 4200 }, { "epoch": 0.9496466431095406, "grad_norm": 0.7372850179672241, "learning_rate": 1.431629069391516e-07, "loss": 2.2742, "step": 4300 }, { "epoch": 0.9717314487632509, "grad_norm": 0.7272618412971497, "learning_rate": 4.519520145525369e-08, "loss": 2.238, "step": 4400 }, { "epoch": 0.9938162544169611, "grad_norm": 0.6909335851669312, "learning_rate": 2.164213936770576e-09, "loss": 2.2434, "step": 4500 }, { "epoch": 1.0, "step": 4528, "total_flos": 8.2279274151936e+16, "train_loss": 2.2892676124303164, "train_runtime": 1413.0668, "train_samples_per_second": 6.408, "train_steps_per_second": 3.204 } ], "logging_steps": 100, "max_steps": 4528, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.2279274151936e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }