{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.99904, "global_step": 2343, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 1e-05, "loss": 36.9779, "step": 50 }, { "epoch": 0.13, "learning_rate": 2e-05, "loss": 16.6502, "step": 100 }, { "epoch": 0.19, "learning_rate": 1.9554168524297815e-05, "loss": 13.7007, "step": 150 }, { "epoch": 0.26, "learning_rate": 1.9108337048595635e-05, "loss": 6.0772, "step": 200 }, { "epoch": 0.32, "learning_rate": 1.8662505572893448e-05, "loss": 4.2448, "step": 250 }, { "epoch": 0.38, "learning_rate": 1.8216674097191264e-05, "loss": 5.0142, "step": 300 }, { "epoch": 0.45, "learning_rate": 1.777084262148908e-05, "loss": 3.3927, "step": 350 }, { "epoch": 0.51, "learning_rate": 1.7325011145786894e-05, "loss": 3.4273, "step": 400 }, { "epoch": 0.58, "learning_rate": 1.687917967008471e-05, "loss": 2.906, "step": 450 }, { "epoch": 0.64, "learning_rate": 1.6433348194382527e-05, "loss": 2.6533, "step": 500 }, { "epoch": 0.7, "learning_rate": 1.598751671868034e-05, "loss": 2.5707, "step": 550 }, { "epoch": 0.77, "learning_rate": 1.5541685242978156e-05, "loss": 2.5857, "step": 600 }, { "epoch": 0.83, "learning_rate": 1.5095853767275971e-05, "loss": 2.5658, "step": 650 }, { "epoch": 0.9, "learning_rate": 1.4650022291573786e-05, "loss": 2.7225, "step": 700 }, { "epoch": 0.96, "learning_rate": 1.42041908158716e-05, "loss": 2.0151, "step": 750 }, { "epoch": 1.0, "eval_accuracy": 0.88208, "eval_loss": 3.9117867946624756, "eval_rmse": 4.946967789532184, "eval_runtime": 621.0961, "eval_samples_per_second": 40.251, "eval_steps_per_second": 2.517, "step": 781 }, { "epoch": 1.02, "learning_rate": 1.3758359340169416e-05, "loss": 2.4671, "step": 800 }, { "epoch": 1.09, "learning_rate": 1.3312527864467232e-05, "loss": 1.8725, "step": 850 }, { "epoch": 1.15, "learning_rate": 1.2866696388765047e-05, "loss": 1.9538, "step": 900 }, { "epoch": 1.22, "learning_rate": 1.2420864913062862e-05, "loss": 1.7274, "step": 950 }, { "epoch": 1.28, "learning_rate": 1.197503343736068e-05, "loss": 1.9257, "step": 1000 }, { "epoch": 1.34, "learning_rate": 1.1529201961658493e-05, "loss": 1.8435, "step": 1050 }, { "epoch": 1.41, "learning_rate": 1.1083370485956308e-05, "loss": 1.6858, "step": 1100 }, { "epoch": 1.47, "learning_rate": 1.0637539010254126e-05, "loss": 1.9954, "step": 1150 }, { "epoch": 1.54, "learning_rate": 1.019170753455194e-05, "loss": 1.7033, "step": 1200 }, { "epoch": 1.6, "learning_rate": 9.745876058849756e-06, "loss": 1.4969, "step": 1250 }, { "epoch": 1.66, "learning_rate": 9.30004458314757e-06, "loss": 1.5146, "step": 1300 }, { "epoch": 1.73, "learning_rate": 8.854213107445387e-06, "loss": 2.0415, "step": 1350 }, { "epoch": 1.79, "learning_rate": 8.408381631743202e-06, "loss": 2.0009, "step": 1400 }, { "epoch": 1.86, "learning_rate": 7.962550156041017e-06, "loss": 1.7283, "step": 1450 }, { "epoch": 1.92, "learning_rate": 7.516718680338832e-06, "loss": 1.7819, "step": 1500 }, { "epoch": 1.98, "learning_rate": 7.070887204636649e-06, "loss": 1.5547, "step": 1550 }, { "epoch": 2.0, "eval_accuracy": 0.95872, "eval_loss": 1.6247199773788452, "eval_rmse": 4.865195563248444, "eval_runtime": 619.8743, "eval_samples_per_second": 40.331, "eval_steps_per_second": 2.521, "step": 1562 }, { "epoch": 2.05, "learning_rate": 6.625055728934463e-06, "loss": 1.4602, "step": 1600 }, { "epoch": 2.11, "learning_rate": 6.179224253232279e-06, "loss": 1.2891, "step": 1650 }, { "epoch": 2.18, "learning_rate": 5.733392777530095e-06, "loss": 1.2627, "step": 1700 }, { "epoch": 2.24, "learning_rate": 5.28756130182791e-06, "loss": 1.4664, "step": 1750 }, { "epoch": 2.3, "learning_rate": 4.841729826125725e-06, "loss": 1.2305, "step": 1800 }, { "epoch": 2.37, "learning_rate": 4.39589835042354e-06, "loss": 1.2872, "step": 1850 }, { "epoch": 2.43, "learning_rate": 3.950066874721356e-06, "loss": 1.2644, "step": 1900 }, { "epoch": 2.5, "learning_rate": 3.5042353990191713e-06, "loss": 1.3754, "step": 1950 }, { "epoch": 2.56, "learning_rate": 3.0584039233169866e-06, "loss": 1.1153, "step": 2000 }, { "epoch": 2.62, "learning_rate": 2.612572447614802e-06, "loss": 1.3498, "step": 2050 }, { "epoch": 2.69, "learning_rate": 2.1667409719126175e-06, "loss": 1.2545, "step": 2100 }, { "epoch": 2.75, "learning_rate": 1.7209094962104325e-06, "loss": 1.0703, "step": 2150 }, { "epoch": 2.82, "learning_rate": 1.2750780205082481e-06, "loss": 1.1519, "step": 2200 }, { "epoch": 2.88, "learning_rate": 8.292465448060634e-07, "loss": 1.0694, "step": 2250 }, { "epoch": 2.94, "learning_rate": 3.834150691038788e-07, "loss": 0.9306, "step": 2300 }, { "epoch": 3.0, "eval_accuracy": 0.96212, "eval_loss": 1.7369074821472168, "eval_rmse": 5.055060677999024, "eval_runtime": 635.5792, "eval_samples_per_second": 39.334, "eval_steps_per_second": 2.459, "step": 2343 } ], "max_steps": 2343, "num_train_epochs": 3, "total_flos": 6.576905725215466e+16, "trial_name": null, "trial_params": null }