{ "best_metric": 1.5200674533843994, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.026319252533228057, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005263850506645612, "grad_norm": 93.07259368896484, "learning_rate": 1e-05, "loss": 12.3105, "step": 1 }, { "epoch": 0.0005263850506645612, "eval_loss": 2.796645402908325, "eval_runtime": 309.2946, "eval_samples_per_second": 10.346, "eval_steps_per_second": 2.587, "step": 1 }, { "epoch": 0.0010527701013291224, "grad_norm": 131.77566528320312, "learning_rate": 2e-05, "loss": 9.2268, "step": 2 }, { "epoch": 0.0015791551519936833, "grad_norm": 71.25743865966797, "learning_rate": 3e-05, "loss": 8.0512, "step": 3 }, { "epoch": 0.0021055402026582447, "grad_norm": 46.8036003112793, "learning_rate": 4e-05, "loss": 7.2551, "step": 4 }, { "epoch": 0.0026319252533228057, "grad_norm": 36.05014419555664, "learning_rate": 5e-05, "loss": 6.3495, "step": 5 }, { "epoch": 0.0031583103039873666, "grad_norm": 26.287073135375977, "learning_rate": 6e-05, "loss": 5.9319, "step": 6 }, { "epoch": 0.003684695354651928, "grad_norm": 38.00836944580078, "learning_rate": 7e-05, "loss": 5.5911, "step": 7 }, { "epoch": 0.004211080405316489, "grad_norm": 23.294231414794922, "learning_rate": 8e-05, "loss": 5.2376, "step": 8 }, { "epoch": 0.00473746545598105, "grad_norm": 22.907855987548828, "learning_rate": 9e-05, "loss": 4.5457, "step": 9 }, { "epoch": 0.005263850506645611, "grad_norm": 27.766204833984375, "learning_rate": 0.0001, "loss": 6.4629, "step": 10 }, { "epoch": 0.005790235557310172, "grad_norm": 24.227497100830078, "learning_rate": 9.999316524962345e-05, "loss": 5.8682, "step": 11 }, { "epoch": 0.006316620607974733, "grad_norm": 20.358135223388672, "learning_rate": 9.997266286704631e-05, "loss": 5.0779, "step": 12 }, { "epoch": 0.006843005658639294, "grad_norm": 22.879426956176758, "learning_rate": 9.993849845741524e-05, "loss": 5.0858, "step": 13 }, { "epoch": 0.007369390709303856, "grad_norm": 22.2658634185791, "learning_rate": 9.989068136093873e-05, "loss": 5.305, "step": 14 }, { "epoch": 0.007895775759968417, "grad_norm": 29.477561950683594, "learning_rate": 9.98292246503335e-05, "loss": 4.8577, "step": 15 }, { "epoch": 0.008422160810632979, "grad_norm": 21.121549606323242, "learning_rate": 9.975414512725057e-05, "loss": 6.6094, "step": 16 }, { "epoch": 0.008948545861297539, "grad_norm": 21.887968063354492, "learning_rate": 9.966546331768191e-05, "loss": 6.1797, "step": 17 }, { "epoch": 0.0094749309119621, "grad_norm": 19.13234519958496, "learning_rate": 9.956320346634876e-05, "loss": 5.6551, "step": 18 }, { "epoch": 0.010001315962626661, "grad_norm": 18.616533279418945, "learning_rate": 9.944739353007344e-05, "loss": 5.9483, "step": 19 }, { "epoch": 0.010527701013291223, "grad_norm": 18.95231056213379, "learning_rate": 9.931806517013612e-05, "loss": 5.4522, "step": 20 }, { "epoch": 0.011054086063955783, "grad_norm": 18.209264755249023, "learning_rate": 9.917525374361912e-05, "loss": 6.1536, "step": 21 }, { "epoch": 0.011580471114620345, "grad_norm": 16.42949867248535, "learning_rate": 9.901899829374047e-05, "loss": 6.0107, "step": 22 }, { "epoch": 0.012106856165284906, "grad_norm": 15.05119514465332, "learning_rate": 9.884934153917997e-05, "loss": 6.5074, "step": 23 }, { "epoch": 0.012633241215949467, "grad_norm": 16.25713348388672, "learning_rate": 9.86663298624003e-05, "loss": 7.0152, "step": 24 }, { "epoch": 0.013159626266614028, "grad_norm": 13.90926742553711, "learning_rate": 9.847001329696653e-05, "loss": 5.5108, "step": 25 }, { "epoch": 0.013686011317278588, "grad_norm": 18.535930633544922, "learning_rate": 9.826044551386744e-05, "loss": 5.9702, "step": 26 }, { "epoch": 0.01421239636794315, "grad_norm": 15.170503616333008, "learning_rate": 9.803768380684242e-05, "loss": 5.659, "step": 27 }, { "epoch": 0.014738781418607712, "grad_norm": 17.371408462524414, "learning_rate": 9.780178907671789e-05, "loss": 5.9414, "step": 28 }, { "epoch": 0.015265166469272272, "grad_norm": 15.820178031921387, "learning_rate": 9.755282581475769e-05, "loss": 5.1419, "step": 29 }, { "epoch": 0.015791551519936834, "grad_norm": 14.776792526245117, "learning_rate": 9.729086208503174e-05, "loss": 5.402, "step": 30 }, { "epoch": 0.016317936570601394, "grad_norm": 16.17049217224121, "learning_rate": 9.701596950580806e-05, "loss": 6.1015, "step": 31 }, { "epoch": 0.016844321621265958, "grad_norm": 15.148641586303711, "learning_rate": 9.672822322997305e-05, "loss": 4.9903, "step": 32 }, { "epoch": 0.017370706671930518, "grad_norm": 15.8573637008667, "learning_rate": 9.642770192448536e-05, "loss": 6.3152, "step": 33 }, { "epoch": 0.017897091722595078, "grad_norm": 14.71132755279541, "learning_rate": 9.611448774886924e-05, "loss": 5.4481, "step": 34 }, { "epoch": 0.018423476773259638, "grad_norm": 14.21374797821045, "learning_rate": 9.578866633275288e-05, "loss": 6.3535, "step": 35 }, { "epoch": 0.0189498618239242, "grad_norm": 15.075162887573242, "learning_rate": 9.545032675245813e-05, "loss": 7.1606, "step": 36 }, { "epoch": 0.01947624687458876, "grad_norm": 14.579214096069336, "learning_rate": 9.509956150664796e-05, "loss": 7.253, "step": 37 }, { "epoch": 0.020002631925253322, "grad_norm": 13.596733093261719, "learning_rate": 9.473646649103818e-05, "loss": 6.1713, "step": 38 }, { "epoch": 0.020529016975917885, "grad_norm": 12.464555740356445, "learning_rate": 9.43611409721806e-05, "loss": 5.6624, "step": 39 }, { "epoch": 0.021055402026582445, "grad_norm": 15.085432052612305, "learning_rate": 9.397368756032445e-05, "loss": 6.6267, "step": 40 }, { "epoch": 0.021581787077247006, "grad_norm": 15.069063186645508, "learning_rate": 9.357421218136386e-05, "loss": 7.4622, "step": 41 }, { "epoch": 0.022108172127911566, "grad_norm": 13.953812599182129, "learning_rate": 9.316282404787871e-05, "loss": 6.3665, "step": 42 }, { "epoch": 0.02263455717857613, "grad_norm": 13.846559524536133, "learning_rate": 9.273963562927695e-05, "loss": 7.0589, "step": 43 }, { "epoch": 0.02316094222924069, "grad_norm": 14.49377155303955, "learning_rate": 9.230476262104677e-05, "loss": 7.26, "step": 44 }, { "epoch": 0.02368732727990525, "grad_norm": 12.730331420898438, "learning_rate": 9.185832391312644e-05, "loss": 7.1364, "step": 45 }, { "epoch": 0.024213712330569813, "grad_norm": 12.382709503173828, "learning_rate": 9.140044155740101e-05, "loss": 6.424, "step": 46 }, { "epoch": 0.024740097381234373, "grad_norm": 13.245918273925781, "learning_rate": 9.093124073433463e-05, "loss": 6.5557, "step": 47 }, { "epoch": 0.025266482431898933, "grad_norm": 16.19131851196289, "learning_rate": 9.045084971874738e-05, "loss": 6.636, "step": 48 }, { "epoch": 0.025792867482563497, "grad_norm": 13.08187484741211, "learning_rate": 8.995939984474624e-05, "loss": 7.1473, "step": 49 }, { "epoch": 0.026319252533228057, "grad_norm": 13.130578994750977, "learning_rate": 8.945702546981969e-05, "loss": 7.0864, "step": 50 }, { "epoch": 0.026319252533228057, "eval_loss": 1.5200674533843994, "eval_runtime": 312.2573, "eval_samples_per_second": 10.248, "eval_steps_per_second": 2.562, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.226539945387622e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }