{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.43485013200807576, "eval_steps": 1000, "global_step": 1750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024848578971890044, "grad_norm": 1.1403515692366237, "learning_rate": 0.0002475, "loss": 0.7727, "step": 100 }, { "epoch": 0.04969715794378009, "grad_norm": 2.0321728234236067, "learning_rate": 0.0004975, "loss": 0.6434, "step": 200 }, { "epoch": 0.07454573691567014, "grad_norm": 1.5298185029395692, "learning_rate": 0.0004896875, "loss": 0.6966, "step": 300 }, { "epoch": 0.09939431588756018, "grad_norm": 1.3277644876612467, "learning_rate": 0.00047927083333333334, "loss": 0.6549, "step": 400 }, { "epoch": 0.12424289485945023, "grad_norm": 1.5260941221380562, "learning_rate": 0.0004688541666666667, "loss": 0.6218, "step": 500 }, { "epoch": 0.14909147383134028, "grad_norm": 1.461823959618585, "learning_rate": 0.0004584375, "loss": 0.6, "step": 600 }, { "epoch": 0.17394005280323033, "grad_norm": 1.579155099177269, "learning_rate": 0.0004480208333333333, "loss": 0.5768, "step": 700 }, { "epoch": 0.19878863177512035, "grad_norm": 1.0030257236753515, "learning_rate": 0.0004376041666666667, "loss": 0.5625, "step": 800 }, { "epoch": 0.2236372107470104, "grad_norm": 1.1362089000123636, "learning_rate": 0.0004271875, "loss": 0.5397, "step": 900 }, { "epoch": 0.24848578971890045, "grad_norm": 1.2060122481392086, "learning_rate": 0.00041677083333333334, "loss": 0.5262, "step": 1000 }, { "epoch": 0.24848578971890045, "eval_accuracy": 0.8519274864233807, "eval_loss": 0.4272424280643463, "eval_runtime": 355.1137, "eval_samples_per_second": 76.471, "eval_steps_per_second": 9.56, "step": 1000 }, { "epoch": 0.2733343686907905, "grad_norm": 1.4939232786831804, "learning_rate": 0.0004063541666666667, "loss": 0.507, "step": 1100 }, { "epoch": 0.29818294766268055, "grad_norm": 1.0210090194207913, "learning_rate": 0.0003959375, "loss": 0.4923, "step": 1200 }, { "epoch": 0.3230315266345706, "grad_norm": 1.5265032617099488, "learning_rate": 0.0003855208333333333, "loss": 0.485, "step": 1300 }, { "epoch": 0.34788010560646065, "grad_norm": 1.5974690542066108, "learning_rate": 0.0003751041666666667, "loss": 0.4734, "step": 1400 }, { "epoch": 0.3727286845783507, "grad_norm": 1.3463235139038994, "learning_rate": 0.0003646875, "loss": 0.4629, "step": 1500 }, { "epoch": 0.3975772635502407, "grad_norm": 0.8545527093751057, "learning_rate": 0.00035427083333333334, "loss": 0.4504, "step": 1600 }, { "epoch": 0.4224258425221308, "grad_norm": 1.1247072555863402, "learning_rate": 0.0003438541666666667, "loss": 0.4366, "step": 1700 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 225213498261504.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }