{ "best_metric": 0.22512628138065338, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.05522192310347208, "eval_steps": 25, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022088769241388833, "grad_norm": 17.61588478088379, "learning_rate": 1.25e-05, "loss": 3.9116, "step": 1 }, { "epoch": 0.0022088769241388833, "eval_loss": 5.86042594909668, "eval_runtime": 2.0601, "eval_samples_per_second": 24.27, "eval_steps_per_second": 6.31, "step": 1 }, { "epoch": 0.0044177538482777666, "grad_norm": 18.221654891967773, "learning_rate": 2.5e-05, "loss": 4.2731, "step": 2 }, { "epoch": 0.006626630772416649, "grad_norm": 18.73583984375, "learning_rate": 3.7500000000000003e-05, "loss": 4.2538, "step": 3 }, { "epoch": 0.008835507696555533, "grad_norm": 16.894182205200195, "learning_rate": 5e-05, "loss": 3.5849, "step": 4 }, { "epoch": 0.011044384620694415, "grad_norm": 13.671380996704102, "learning_rate": 6.25e-05, "loss": 2.4494, "step": 5 }, { "epoch": 0.013253261544833299, "grad_norm": 14.39706039428711, "learning_rate": 7.500000000000001e-05, "loss": 1.5397, "step": 6 }, { "epoch": 0.015462138468972183, "grad_norm": 36.43305587768555, "learning_rate": 8.75e-05, "loss": 4.0588, "step": 7 }, { "epoch": 0.017671015393111066, "grad_norm": 29.52732276916504, "learning_rate": 0.0001, "loss": 2.5498, "step": 8 }, { "epoch": 0.019879892317249948, "grad_norm": 15.871655464172363, "learning_rate": 9.999397620593029e-05, "loss": 0.7469, "step": 9 }, { "epoch": 0.02208876924138883, "grad_norm": 37.426918029785156, "learning_rate": 9.997590643643647e-05, "loss": 0.2828, "step": 10 }, { "epoch": 0.024297646165527716, "grad_norm": 38.68975830078125, "learning_rate": 9.994579552923277e-05, "loss": 0.4165, "step": 11 }, { "epoch": 0.026506523089666598, "grad_norm": 3.2423059940338135, "learning_rate": 9.990365154573717e-05, "loss": 0.0656, "step": 12 }, { "epoch": 0.02871540001380548, "grad_norm": 21.096725463867188, "learning_rate": 9.984948576891312e-05, "loss": 1.0799, "step": 13 }, { "epoch": 0.030924276937944365, "grad_norm": 10.848109245300293, "learning_rate": 9.978331270024886e-05, "loss": 0.4541, "step": 14 }, { "epoch": 0.033133153862083244, "grad_norm": 3.0925045013427734, "learning_rate": 9.9705150055875e-05, "loss": 0.0921, "step": 15 }, { "epoch": 0.03534203078622213, "grad_norm": 0.7270008325576782, "learning_rate": 9.961501876182148e-05, "loss": 0.012, "step": 16 }, { "epoch": 0.037550907710361014, "grad_norm": 2.127373695373535, "learning_rate": 9.951294294841516e-05, "loss": 0.006, "step": 17 }, { "epoch": 0.039759784634499896, "grad_norm": 13.903672218322754, "learning_rate": 9.939894994381957e-05, "loss": 0.0372, "step": 18 }, { "epoch": 0.04196866155863878, "grad_norm": 0.047538336366415024, "learning_rate": 9.927307026671848e-05, "loss": 0.0005, "step": 19 }, { "epoch": 0.04417753848277766, "grad_norm": 0.12976495921611786, "learning_rate": 9.913533761814537e-05, "loss": 0.0015, "step": 20 }, { "epoch": 0.04638641540691654, "grad_norm": 0.053914595395326614, "learning_rate": 9.898578887246075e-05, "loss": 0.0006, "step": 21 }, { "epoch": 0.04859529233105543, "grad_norm": 0.016516996547579765, "learning_rate": 9.882446406748002e-05, "loss": 0.0002, "step": 22 }, { "epoch": 0.05080416925519431, "grad_norm": 0.004742575343698263, "learning_rate": 9.865140639375449e-05, "loss": 0.0, "step": 23 }, { "epoch": 0.053013046179333195, "grad_norm": 0.001722239889204502, "learning_rate": 9.846666218300807e-05, "loss": 0.0, "step": 24 }, { "epoch": 0.05522192310347208, "grad_norm": 0.0011573476949706674, "learning_rate": 9.827028089573329e-05, "loss": 0.0, "step": 25 }, { "epoch": 0.05522192310347208, "eval_loss": 0.22512628138065338, "eval_runtime": 2.0717, "eval_samples_per_second": 24.135, "eval_steps_per_second": 6.275, "step": 25 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.1240447254528e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }