{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.899408284023669, "eval_steps": 500, "global_step": 63, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23668639053254437, "grad_norm": 2.409322500228882, "learning_rate": 4.996811065272715e-05, "loss": 1.0822, "mean_token_accuracy": 0.7053477883338928, "step": 5 }, { "epoch": 0.47337278106508873, "grad_norm": 0.9640182852745056, "learning_rate": 4.8861446190538576e-05, "loss": 0.921, "mean_token_accuracy": 0.7317641347646713, "step": 10 }, { "epoch": 0.7100591715976331, "grad_norm": 0.7025872468948364, "learning_rate": 4.624956317935659e-05, "loss": 0.8498, "mean_token_accuracy": 0.7442429676651955, "step": 15 }, { "epoch": 0.9467455621301775, "grad_norm": 0.46222633123397827, "learning_rate": 4.2316506028963374e-05, "loss": 0.8105, "mean_token_accuracy": 0.75324527323246, "step": 20 }, { "epoch": 1.1420118343195267, "grad_norm": 0.510589063167572, "learning_rate": 3.733941471032425e-05, "loss": 0.7696, "mean_token_accuracy": 0.7617284547198903, "step": 25 }, { "epoch": 1.378698224852071, "grad_norm": 0.37975624203681946, "learning_rate": 3.1668996291960073e-05, "loss": 0.7509, "mean_token_accuracy": 0.7663201123476029, "step": 30 }, { "epoch": 1.6153846153846154, "grad_norm": 0.3334354758262634, "learning_rate": 2.570481262505563e-05, "loss": 0.7224, "mean_token_accuracy": 0.7735484451055527, "step": 35 }, { "epoch": 1.8520710059171597, "grad_norm": 0.2994464933872223, "learning_rate": 1.986712551234432e-05, "loss": 0.7218, "mean_token_accuracy": 0.7732948541641236, "step": 40 }, { "epoch": 2.0473372781065087, "grad_norm": 0.7119457721710205, "learning_rate": 1.4567283270175847e-05, "loss": 0.7124, "mean_token_accuracy": 0.7780754186890342, "step": 45 }, { "epoch": 2.2840236686390534, "grad_norm": 0.3027782440185547, "learning_rate": 1.0178735372827107e-05, "loss": 0.6834, "mean_token_accuracy": 0.7832115352153778, "step": 50 }, { "epoch": 2.5207100591715976, "grad_norm": 0.2431010901927948, "learning_rate": 7.010717610764453e-06, "loss": 0.682, "mean_token_accuracy": 0.7827403783798218, "step": 55 }, { "epoch": 2.757396449704142, "grad_norm": 0.23020850121974945, "learning_rate": 5.286462018769748e-06, "loss": 0.673, "mean_token_accuracy": 0.7855649277567863, "step": 60 }, { "epoch": 2.899408284023669, "mean_token_accuracy": 0.785752405722936, "step": 63, "total_flos": 92935576092672.0, "train_loss": 0.7762386950235518, "train_runtime": 1655.1689, "train_samples_per_second": 4.899, "train_steps_per_second": 0.038 } ], "logging_steps": 5, "max_steps": 63, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 92935576092672.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }