{ "best_metric": 1.9968057870864868, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.746268656716418, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014925373134328358, "grad_norm": 0.30074843764305115, "learning_rate": 2e-05, "loss": 1.7593, "step": 1 }, { "epoch": 0.014925373134328358, "eval_loss": 2.4016451835632324, "eval_runtime": 0.979, "eval_samples_per_second": 115.424, "eval_steps_per_second": 15.322, "step": 1 }, { "epoch": 0.029850746268656716, "grad_norm": 0.3124169409275055, "learning_rate": 4e-05, "loss": 1.8847, "step": 2 }, { "epoch": 0.04477611940298507, "grad_norm": 0.3307819962501526, "learning_rate": 6e-05, "loss": 2.1815, "step": 3 }, { "epoch": 0.05970149253731343, "grad_norm": 0.39875397086143494, "learning_rate": 8e-05, "loss": 2.2626, "step": 4 }, { "epoch": 0.07462686567164178, "grad_norm": 0.3945293426513672, "learning_rate": 0.0001, "loss": 2.1814, "step": 5 }, { "epoch": 0.08955223880597014, "grad_norm": 0.3672662377357483, "learning_rate": 9.987820251299122e-05, "loss": 2.2924, "step": 6 }, { "epoch": 0.1044776119402985, "grad_norm": 0.37927067279815674, "learning_rate": 9.951340343707852e-05, "loss": 2.4045, "step": 7 }, { "epoch": 0.11940298507462686, "grad_norm": 0.402125746011734, "learning_rate": 9.890738003669029e-05, "loss": 2.1665, "step": 8 }, { "epoch": 0.13432835820895522, "grad_norm": 0.49144914746284485, "learning_rate": 9.806308479691595e-05, "loss": 2.287, "step": 9 }, { "epoch": 0.14925373134328357, "grad_norm": 0.4933624565601349, "learning_rate": 9.698463103929542e-05, "loss": 2.3176, "step": 10 }, { "epoch": 0.16417910447761194, "grad_norm": 0.4840998351573944, "learning_rate": 9.567727288213005e-05, "loss": 2.3052, "step": 11 }, { "epoch": 0.1791044776119403, "grad_norm": 0.48531755805015564, "learning_rate": 9.414737964294636e-05, "loss": 2.3138, "step": 12 }, { "epoch": 0.19402985074626866, "grad_norm": 0.5164350271224976, "learning_rate": 9.24024048078213e-05, "loss": 2.0854, "step": 13 }, { "epoch": 0.208955223880597, "grad_norm": 0.5610319972038269, "learning_rate": 9.045084971874738e-05, "loss": 2.3273, "step": 14 }, { "epoch": 0.22388059701492538, "grad_norm": 0.6317114233970642, "learning_rate": 8.83022221559489e-05, "loss": 2.3732, "step": 15 }, { "epoch": 0.23880597014925373, "grad_norm": 0.8776953816413879, "learning_rate": 8.596699001693255e-05, "loss": 2.2796, "step": 16 }, { "epoch": 0.2537313432835821, "grad_norm": 0.33381423354148865, "learning_rate": 8.345653031794292e-05, "loss": 1.7108, "step": 17 }, { "epoch": 0.26865671641791045, "grad_norm": 0.2970743477344513, "learning_rate": 8.07830737662829e-05, "loss": 1.5712, "step": 18 }, { "epoch": 0.2835820895522388, "grad_norm": 0.37099114060401917, "learning_rate": 7.795964517353735e-05, "loss": 1.937, "step": 19 }, { "epoch": 0.29850746268656714, "grad_norm": 0.3620712459087372, "learning_rate": 7.500000000000001e-05, "loss": 2.0657, "step": 20 }, { "epoch": 0.31343283582089554, "grad_norm": 0.34259912371635437, "learning_rate": 7.191855733945387e-05, "loss": 1.9633, "step": 21 }, { "epoch": 0.3283582089552239, "grad_norm": 0.4164319932460785, "learning_rate": 6.873032967079561e-05, "loss": 2.2391, "step": 22 }, { "epoch": 0.34328358208955223, "grad_norm": 0.34018903970718384, "learning_rate": 6.545084971874738e-05, "loss": 2.1109, "step": 23 }, { "epoch": 0.3582089552238806, "grad_norm": 0.36194512248039246, "learning_rate": 6.209609477998338e-05, "loss": 2.1308, "step": 24 }, { "epoch": 0.373134328358209, "grad_norm": 0.36260104179382324, "learning_rate": 5.868240888334653e-05, "loss": 2.0707, "step": 25 }, { "epoch": 0.373134328358209, "eval_loss": 2.055764675140381, "eval_runtime": 0.9467, "eval_samples_per_second": 119.362, "eval_steps_per_second": 15.845, "step": 25 }, { "epoch": 0.3880597014925373, "grad_norm": 0.39158010482788086, "learning_rate": 5.522642316338268e-05, "loss": 2.1612, "step": 26 }, { "epoch": 0.40298507462686567, "grad_norm": 0.42965012788772583, "learning_rate": 5.174497483512506e-05, "loss": 2.2331, "step": 27 }, { "epoch": 0.417910447761194, "grad_norm": 0.4510224759578705, "learning_rate": 4.825502516487497e-05, "loss": 2.1721, "step": 28 }, { "epoch": 0.43283582089552236, "grad_norm": 0.4704110622406006, "learning_rate": 4.477357683661734e-05, "loss": 2.0252, "step": 29 }, { "epoch": 0.44776119402985076, "grad_norm": 0.5712336301803589, "learning_rate": 4.131759111665349e-05, "loss": 2.0158, "step": 30 }, { "epoch": 0.4626865671641791, "grad_norm": 0.5858246684074402, "learning_rate": 3.790390522001662e-05, "loss": 2.0812, "step": 31 }, { "epoch": 0.47761194029850745, "grad_norm": 0.9474906325340271, "learning_rate": 3.4549150281252636e-05, "loss": 2.0843, "step": 32 }, { "epoch": 0.4925373134328358, "grad_norm": 0.24608998000621796, "learning_rate": 3.12696703292044e-05, "loss": 1.6666, "step": 33 }, { "epoch": 0.5074626865671642, "grad_norm": 0.2545487582683563, "learning_rate": 2.8081442660546125e-05, "loss": 1.8542, "step": 34 }, { "epoch": 0.5223880597014925, "grad_norm": 0.25772571563720703, "learning_rate": 2.500000000000001e-05, "loss": 1.8961, "step": 35 }, { "epoch": 0.5373134328358209, "grad_norm": 0.3282116949558258, "learning_rate": 2.2040354826462668e-05, "loss": 1.9271, "step": 36 }, { "epoch": 0.5522388059701493, "grad_norm": 0.30054324865341187, "learning_rate": 1.9216926233717085e-05, "loss": 1.99, "step": 37 }, { "epoch": 0.5671641791044776, "grad_norm": 0.33032283186912537, "learning_rate": 1.6543469682057106e-05, "loss": 1.9075, "step": 38 }, { "epoch": 0.582089552238806, "grad_norm": 0.3541136384010315, "learning_rate": 1.4033009983067452e-05, "loss": 2.1065, "step": 39 }, { "epoch": 0.5970149253731343, "grad_norm": 0.36783766746520996, "learning_rate": 1.1697777844051105e-05, "loss": 2.1716, "step": 40 }, { "epoch": 0.6119402985074627, "grad_norm": 0.3704308569431305, "learning_rate": 9.549150281252633e-06, "loss": 2.0668, "step": 41 }, { "epoch": 0.6268656716417911, "grad_norm": 0.41871926188468933, "learning_rate": 7.597595192178702e-06, "loss": 2.0886, "step": 42 }, { "epoch": 0.6417910447761194, "grad_norm": 0.4612567126750946, "learning_rate": 5.852620357053651e-06, "loss": 2.176, "step": 43 }, { "epoch": 0.6567164179104478, "grad_norm": 0.43885257840156555, "learning_rate": 4.322727117869951e-06, "loss": 2.0272, "step": 44 }, { "epoch": 0.6716417910447762, "grad_norm": 0.5096978545188904, "learning_rate": 3.0153689607045845e-06, "loss": 2.1226, "step": 45 }, { "epoch": 0.6865671641791045, "grad_norm": 0.49920862913131714, "learning_rate": 1.9369152030840556e-06, "loss": 1.8736, "step": 46 }, { "epoch": 0.7014925373134329, "grad_norm": 0.6098780632019043, "learning_rate": 1.0926199633097157e-06, "loss": 1.9208, "step": 47 }, { "epoch": 0.7164179104477612, "grad_norm": 0.7482870817184448, "learning_rate": 4.865965629214819e-07, "loss": 1.972, "step": 48 }, { "epoch": 0.7313432835820896, "grad_norm": 0.2268557995557785, "learning_rate": 1.2179748700879012e-07, "loss": 1.6986, "step": 49 }, { "epoch": 0.746268656716418, "grad_norm": 0.2336985319852829, "learning_rate": 0.0, "loss": 1.7152, "step": 50 }, { "epoch": 0.746268656716418, "eval_loss": 1.9968057870864868, "eval_runtime": 0.9661, "eval_samples_per_second": 116.963, "eval_steps_per_second": 15.526, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 704570172899328.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }