{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6611570247933884, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006611570247933884, "eval_loss": 1.3909879922866821, "eval_runtime": 14.2943, "eval_samples_per_second": 17.839, "eval_steps_per_second": 2.239, "step": 1 }, { "epoch": 0.019834710743801654, "grad_norm": 1.0536102056503296, "learning_rate": 1.5e-05, "loss": 5.6401, "step": 3 }, { "epoch": 0.03966942148760331, "grad_norm": 0.7944437265396118, "learning_rate": 3e-05, "loss": 5.6383, "step": 6 }, { "epoch": 0.05950413223140496, "grad_norm": 1.0762602090835571, "learning_rate": 4.5e-05, "loss": 5.5331, "step": 9 }, { "epoch": 0.05950413223140496, "eval_loss": 1.3733352422714233, "eval_runtime": 14.3069, "eval_samples_per_second": 17.824, "eval_steps_per_second": 2.237, "step": 9 }, { "epoch": 0.07933884297520662, "grad_norm": 1.1391944885253906, "learning_rate": 4.993910125649561e-05, "loss": 5.5774, "step": 12 }, { "epoch": 0.09917355371900827, "grad_norm": 0.7644323110580444, "learning_rate": 4.962019382530521e-05, "loss": 5.2498, "step": 15 }, { "epoch": 0.11900826446280992, "grad_norm": 0.9668009877204895, "learning_rate": 4.9031542398457974e-05, "loss": 5.3533, "step": 18 }, { "epoch": 0.11900826446280992, "eval_loss": 1.2732479572296143, "eval_runtime": 14.3805, "eval_samples_per_second": 17.732, "eval_steps_per_second": 2.225, "step": 18 }, { "epoch": 0.13884297520661157, "grad_norm": 0.7748279571533203, "learning_rate": 4.817959636416969e-05, "loss": 5.2816, "step": 21 }, { "epoch": 0.15867768595041323, "grad_norm": 0.7448261380195618, "learning_rate": 4.707368982147318e-05, "loss": 5.0151, "step": 24 }, { "epoch": 0.17851239669421487, "grad_norm": 0.8115647435188293, "learning_rate": 4.572593931387604e-05, "loss": 5.0161, "step": 27 }, { "epoch": 0.17851239669421487, "eval_loss": 1.2196054458618164, "eval_runtime": 14.4006, "eval_samples_per_second": 17.708, "eval_steps_per_second": 2.222, "step": 27 }, { "epoch": 0.19834710743801653, "grad_norm": 0.8935056328773499, "learning_rate": 4.415111107797445e-05, "loss": 5.1896, "step": 30 }, { "epoch": 0.21818181818181817, "grad_norm": 0.7651195526123047, "learning_rate": 4.2366459261474933e-05, "loss": 5.0862, "step": 33 }, { "epoch": 0.23801652892561984, "grad_norm": 0.9722391366958618, "learning_rate": 4.039153688314145e-05, "loss": 4.5262, "step": 36 }, { "epoch": 0.23801652892561984, "eval_loss": 1.1881024837493896, "eval_runtime": 14.3953, "eval_samples_per_second": 17.714, "eval_steps_per_second": 2.223, "step": 36 }, { "epoch": 0.2578512396694215, "grad_norm": 0.8714058995246887, "learning_rate": 3.824798160583012e-05, "loss": 5.0819, "step": 39 }, { "epoch": 0.27768595041322314, "grad_norm": 0.74471116065979, "learning_rate": 3.5959278669726935e-05, "loss": 4.7994, "step": 42 }, { "epoch": 0.2975206611570248, "grad_norm": 0.9142318964004517, "learning_rate": 3.355050358314172e-05, "loss": 4.6626, "step": 45 }, { "epoch": 0.2975206611570248, "eval_loss": 1.1637901067733765, "eval_runtime": 14.4356, "eval_samples_per_second": 17.665, "eval_steps_per_second": 2.217, "step": 45 }, { "epoch": 0.31735537190082647, "grad_norm": 0.739345371723175, "learning_rate": 3.104804738999169e-05, "loss": 4.6767, "step": 48 }, { "epoch": 0.3371900826446281, "grad_norm": 0.8303969502449036, "learning_rate": 2.8479327524001636e-05, "loss": 4.6031, "step": 51 }, { "epoch": 0.35702479338842974, "grad_norm": 0.7603834271430969, "learning_rate": 2.587248741756253e-05, "loss": 4.6645, "step": 54 }, { "epoch": 0.35702479338842974, "eval_loss": 1.1474025249481201, "eval_runtime": 14.6583, "eval_samples_per_second": 17.396, "eval_steps_per_second": 2.183, "step": 54 }, { "epoch": 0.3768595041322314, "grad_norm": 0.8660556674003601, "learning_rate": 2.3256088156396868e-05, "loss": 4.5342, "step": 57 }, { "epoch": 0.39669421487603307, "grad_norm": 0.7600996494293213, "learning_rate": 2.0658795558326743e-05, "loss": 4.6133, "step": 60 }, { "epoch": 0.41652892561983473, "grad_norm": 0.8485195636749268, "learning_rate": 1.8109066104575023e-05, "loss": 4.7318, "step": 63 }, { "epoch": 0.41652892561983473, "eval_loss": 1.136008858680725, "eval_runtime": 14.6101, "eval_samples_per_second": 17.454, "eval_steps_per_second": 2.19, "step": 63 }, { "epoch": 0.43636363636363634, "grad_norm": 0.8980472087860107, "learning_rate": 1.56348351646022e-05, "loss": 4.8397, "step": 66 }, { "epoch": 0.456198347107438, "grad_norm": 0.851276159286499, "learning_rate": 1.3263210930352737e-05, "loss": 4.4802, "step": 69 }, { "epoch": 0.47603305785123967, "grad_norm": 0.7109021544456482, "learning_rate": 1.1020177413231334e-05, "loss": 4.5363, "step": 72 }, { "epoch": 0.47603305785123967, "eval_loss": 1.1297308206558228, "eval_runtime": 14.598, "eval_samples_per_second": 17.468, "eval_steps_per_second": 2.192, "step": 72 }, { "epoch": 0.49586776859504134, "grad_norm": 0.7406261563301086, "learning_rate": 8.930309757836517e-06, "loss": 4.5372, "step": 75 }, { "epoch": 0.515702479338843, "grad_norm": 0.8467620611190796, "learning_rate": 7.016504991533726e-06, "loss": 4.7346, "step": 78 }, { "epoch": 0.5355371900826447, "grad_norm": 0.9248302578926086, "learning_rate": 5.299731159831953e-06, "loss": 4.4609, "step": 81 }, { "epoch": 0.5355371900826447, "eval_loss": 1.1261959075927734, "eval_runtime": 14.3891, "eval_samples_per_second": 17.722, "eval_steps_per_second": 2.224, "step": 81 }, { "epoch": 0.5553719008264463, "grad_norm": 0.7920399904251099, "learning_rate": 3.798797596089351e-06, "loss": 4.8255, "step": 84 }, { "epoch": 0.5752066115702479, "grad_norm": 1.0188922882080078, "learning_rate": 2.5301488425208296e-06, "loss": 5.0426, "step": 87 }, { "epoch": 0.5950413223140496, "grad_norm": 0.8648769855499268, "learning_rate": 1.5076844803522922e-06, "loss": 4.5582, "step": 90 }, { "epoch": 0.5950413223140496, "eval_loss": 1.1249107122421265, "eval_runtime": 14.3848, "eval_samples_per_second": 17.727, "eval_steps_per_second": 2.225, "step": 90 }, { "epoch": 0.6148760330578512, "grad_norm": 0.8916571736335754, "learning_rate": 7.426068431000882e-07, "loss": 4.578, "step": 93 }, { "epoch": 0.6347107438016529, "grad_norm": 0.8208229541778564, "learning_rate": 2.4329828146074095e-07, "loss": 4.3232, "step": 96 }, { "epoch": 0.6545454545454545, "grad_norm": 0.9390489459037781, "learning_rate": 1.522932452260595e-08, "loss": 4.4245, "step": 99 }, { "epoch": 0.6545454545454545, "eval_loss": 1.1246190071105957, "eval_runtime": 14.4395, "eval_samples_per_second": 17.66, "eval_steps_per_second": 2.216, "step": 99 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.05367634278613e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }