{ "best_metric": 1.4647512435913086, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.032605151613955004, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006521030322791001, "grad_norm": 0.21651652455329895, "learning_rate": 2e-05, "loss": 1.0694, "step": 1 }, { "epoch": 0.0006521030322791001, "eval_loss": 1.6041988134384155, "eval_runtime": 21.5314, "eval_samples_per_second": 119.964, "eval_steps_per_second": 15.001, "step": 1 }, { "epoch": 0.0013042060645582002, "grad_norm": 0.23289066553115845, "learning_rate": 4e-05, "loss": 1.2117, "step": 2 }, { "epoch": 0.0019563090968373, "grad_norm": 0.3474409580230713, "learning_rate": 6e-05, "loss": 1.6201, "step": 3 }, { "epoch": 0.0026084121291164004, "grad_norm": 0.31122875213623047, "learning_rate": 8e-05, "loss": 1.3937, "step": 4 }, { "epoch": 0.0032605151613955006, "grad_norm": 0.32344159483909607, "learning_rate": 0.0001, "loss": 1.4884, "step": 5 }, { "epoch": 0.0039126181936746, "grad_norm": 0.2839902341365814, "learning_rate": 9.987820251299122e-05, "loss": 1.311, "step": 6 }, { "epoch": 0.004564721225953701, "grad_norm": 0.26937419176101685, "learning_rate": 9.951340343707852e-05, "loss": 1.434, "step": 7 }, { "epoch": 0.005216824258232801, "grad_norm": 0.24133557081222534, "learning_rate": 9.890738003669029e-05, "loss": 1.3177, "step": 8 }, { "epoch": 0.0058689272905119005, "grad_norm": 0.2753654420375824, "learning_rate": 9.806308479691595e-05, "loss": 1.4809, "step": 9 }, { "epoch": 0.006521030322791001, "grad_norm": 0.37832701206207275, "learning_rate": 9.698463103929542e-05, "loss": 1.3338, "step": 10 }, { "epoch": 0.007173133355070101, "grad_norm": 0.28633660078048706, "learning_rate": 9.567727288213005e-05, "loss": 1.4828, "step": 11 }, { "epoch": 0.0078252363873492, "grad_norm": 0.2793128788471222, "learning_rate": 9.414737964294636e-05, "loss": 1.452, "step": 12 }, { "epoch": 0.008477339419628301, "grad_norm": 0.2489110678434372, "learning_rate": 9.24024048078213e-05, "loss": 1.4138, "step": 13 }, { "epoch": 0.009129442451907402, "grad_norm": 0.23229557275772095, "learning_rate": 9.045084971874738e-05, "loss": 1.4449, "step": 14 }, { "epoch": 0.0097815454841865, "grad_norm": 0.21934479475021362, "learning_rate": 8.83022221559489e-05, "loss": 1.4662, "step": 15 }, { "epoch": 0.010433648516465601, "grad_norm": 0.24849727749824524, "learning_rate": 8.596699001693255e-05, "loss": 1.3644, "step": 16 }, { "epoch": 0.011085751548744702, "grad_norm": 0.2108910232782364, "learning_rate": 8.345653031794292e-05, "loss": 1.33, "step": 17 }, { "epoch": 0.011737854581023801, "grad_norm": 0.19061513245105743, "learning_rate": 8.07830737662829e-05, "loss": 1.2974, "step": 18 }, { "epoch": 0.012389957613302902, "grad_norm": 0.20481941103935242, "learning_rate": 7.795964517353735e-05, "loss": 1.439, "step": 19 }, { "epoch": 0.013042060645582002, "grad_norm": 0.20506615936756134, "learning_rate": 7.500000000000001e-05, "loss": 1.3511, "step": 20 }, { "epoch": 0.013694163677861103, "grad_norm": 0.20289862155914307, "learning_rate": 7.191855733945387e-05, "loss": 1.3739, "step": 21 }, { "epoch": 0.014346266710140202, "grad_norm": 0.18342691659927368, "learning_rate": 6.873032967079561e-05, "loss": 1.3071, "step": 22 }, { "epoch": 0.014998369742419302, "grad_norm": 0.20112717151641846, "learning_rate": 6.545084971874738e-05, "loss": 1.2712, "step": 23 }, { "epoch": 0.0156504727746984, "grad_norm": 0.19664767384529114, "learning_rate": 6.209609477998338e-05, "loss": 1.4136, "step": 24 }, { "epoch": 0.016302575806977502, "grad_norm": 0.23060624301433563, "learning_rate": 5.868240888334653e-05, "loss": 1.3621, "step": 25 }, { "epoch": 0.016302575806977502, "eval_loss": 1.4904130697250366, "eval_runtime": 21.4171, "eval_samples_per_second": 120.604, "eval_steps_per_second": 15.081, "step": 25 }, { "epoch": 0.016954678839256603, "grad_norm": 0.2123045027256012, "learning_rate": 5.522642316338268e-05, "loss": 1.3638, "step": 26 }, { "epoch": 0.017606781871535703, "grad_norm": 0.19667382538318634, "learning_rate": 5.174497483512506e-05, "loss": 1.4593, "step": 27 }, { "epoch": 0.018258884903814804, "grad_norm": 0.21623675525188446, "learning_rate": 4.825502516487497e-05, "loss": 1.3836, "step": 28 }, { "epoch": 0.018910987936093904, "grad_norm": 0.2061716467142105, "learning_rate": 4.477357683661734e-05, "loss": 1.5142, "step": 29 }, { "epoch": 0.019563090968373, "grad_norm": 0.21938467025756836, "learning_rate": 4.131759111665349e-05, "loss": 1.4625, "step": 30 }, { "epoch": 0.020215194000652102, "grad_norm": 0.22383436560630798, "learning_rate": 3.790390522001662e-05, "loss": 1.4316, "step": 31 }, { "epoch": 0.020867297032931203, "grad_norm": 0.21363838016986847, "learning_rate": 3.4549150281252636e-05, "loss": 1.5011, "step": 32 }, { "epoch": 0.021519400065210303, "grad_norm": 0.23454470932483673, "learning_rate": 3.12696703292044e-05, "loss": 1.4714, "step": 33 }, { "epoch": 0.022171503097489404, "grad_norm": 0.2438150942325592, "learning_rate": 2.8081442660546125e-05, "loss": 1.4566, "step": 34 }, { "epoch": 0.022823606129768505, "grad_norm": 0.24382317066192627, "learning_rate": 2.500000000000001e-05, "loss": 1.394, "step": 35 }, { "epoch": 0.023475709162047602, "grad_norm": 0.26105523109436035, "learning_rate": 2.2040354826462668e-05, "loss": 1.5844, "step": 36 }, { "epoch": 0.024127812194326703, "grad_norm": 0.25343576073646545, "learning_rate": 1.9216926233717085e-05, "loss": 1.5326, "step": 37 }, { "epoch": 0.024779915226605803, "grad_norm": 0.2793176472187042, "learning_rate": 1.6543469682057106e-05, "loss": 1.4606, "step": 38 }, { "epoch": 0.025432018258884904, "grad_norm": 0.2861690819263458, "learning_rate": 1.4033009983067452e-05, "loss": 1.5886, "step": 39 }, { "epoch": 0.026084121291164004, "grad_norm": 0.3055659532546997, "learning_rate": 1.1697777844051105e-05, "loss": 1.6292, "step": 40 }, { "epoch": 0.026736224323443105, "grad_norm": 0.3592061698436737, "learning_rate": 9.549150281252633e-06, "loss": 1.6955, "step": 41 }, { "epoch": 0.027388327355722206, "grad_norm": 0.3756411373615265, "learning_rate": 7.597595192178702e-06, "loss": 1.6397, "step": 42 }, { "epoch": 0.028040430388001303, "grad_norm": 0.3495422303676605, "learning_rate": 5.852620357053651e-06, "loss": 1.4516, "step": 43 }, { "epoch": 0.028692533420280403, "grad_norm": 0.41340503096580505, "learning_rate": 4.322727117869951e-06, "loss": 1.7137, "step": 44 }, { "epoch": 0.029344636452559504, "grad_norm": 0.46326860785484314, "learning_rate": 3.0153689607045845e-06, "loss": 1.6484, "step": 45 }, { "epoch": 0.029996739484838605, "grad_norm": 0.44499802589416504, "learning_rate": 1.9369152030840556e-06, "loss": 1.6504, "step": 46 }, { "epoch": 0.030648842517117705, "grad_norm": 0.5560527443885803, "learning_rate": 1.0926199633097157e-06, "loss": 1.7462, "step": 47 }, { "epoch": 0.0313009455493968, "grad_norm": 0.609457790851593, "learning_rate": 4.865965629214819e-07, "loss": 1.7769, "step": 48 }, { "epoch": 0.03195304858167591, "grad_norm": 0.597760021686554, "learning_rate": 1.2179748700879012e-07, "loss": 1.9396, "step": 49 }, { "epoch": 0.032605151613955004, "grad_norm": 0.8746095895767212, "learning_rate": 0.0, "loss": 2.2609, "step": 50 }, { "epoch": 0.032605151613955004, "eval_loss": 1.4647512435913086, "eval_runtime": 21.4373, "eval_samples_per_second": 120.491, "eval_steps_per_second": 15.067, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 914705136746496.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }