{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10816657652785289, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01081665765278529, "eval_accuracy": 0.7290303584415322, "eval_loss": 1.2268004417419434, "eval_runtime": 456.6189, "eval_samples_per_second": 34.25, "eval_steps_per_second": 1.071, "step": 100 }, { "epoch": 0.02163331530557058, "eval_accuracy": 0.7563876376099345, "eval_loss": 1.078468918800354, "eval_runtime": 456.7154, "eval_samples_per_second": 34.242, "eval_steps_per_second": 1.071, "step": 200 }, { "epoch": 0.03244997295835587, "eval_accuracy": 0.7668799267017361, "eval_loss": 1.0238652229309082, "eval_runtime": 456.7597, "eval_samples_per_second": 34.239, "eval_steps_per_second": 1.071, "step": 300 }, { "epoch": 0.04326663061114116, "eval_accuracy": 0.7730476325324204, "eval_loss": 0.9894265532493591, "eval_runtime": 456.7371, "eval_samples_per_second": 34.241, "eval_steps_per_second": 1.071, "step": 400 }, { "epoch": 0.05408328826392644, "grad_norm": 2.1639328002929688, "learning_rate": 4.909861186226789e-05, "loss": 1.2276, "step": 500 }, { "epoch": 0.05408328826392644, "eval_accuracy": 0.7784154459204475, "eval_loss": 0.9612082839012146, "eval_runtime": 456.6452, "eval_samples_per_second": 34.248, "eval_steps_per_second": 1.071, "step": 500 }, { "epoch": 0.06489994591671173, "eval_accuracy": 0.782137070411325, "eval_loss": 0.9466578960418701, "eval_runtime": 456.6288, "eval_samples_per_second": 34.249, "eval_steps_per_second": 1.071, "step": 600 }, { "epoch": 0.07571660356949703, "eval_accuracy": 0.7840395143262153, "eval_loss": 0.930482804775238, "eval_runtime": 456.4891, "eval_samples_per_second": 34.259, "eval_steps_per_second": 1.071, "step": 700 }, { "epoch": 0.08653326122228232, "eval_accuracy": 0.7867131143188731, "eval_loss": 0.9141340851783752, "eval_runtime": 456.5273, "eval_samples_per_second": 34.256, "eval_steps_per_second": 1.071, "step": 800 }, { "epoch": 0.09734991887506761, "eval_accuracy": 0.7895170312687647, "eval_loss": 0.9005721211433411, "eval_runtime": 456.6578, "eval_samples_per_second": 34.247, "eval_steps_per_second": 1.071, "step": 900 }, { "epoch": 0.10816657652785289, "grad_norm": 1.771813154220581, "learning_rate": 4.819722372453579e-05, "loss": 0.9226, "step": 1000 }, { "epoch": 0.10816657652785289, "eval_accuracy": 0.7909631562396202, "eval_loss": 0.896154522895813, "eval_runtime": 456.9174, "eval_samples_per_second": 34.227, "eval_steps_per_second": 1.07, "step": 1000 } ], "logging_steps": 500, "max_steps": 27735, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "total_flos": 7.41887283560448e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }