{ "best_metric": 0.16538894176483154, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.03388681802778719, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016943409013893597, "eval_loss": 0.2710127830505371, "eval_runtime": 202.1343, "eval_samples_per_second": 12.294, "eval_steps_per_second": 3.077, "step": 1 }, { "epoch": 0.0016943409013893595, "grad_norm": 1.9416357278823853, "learning_rate": 4.0600000000000004e-05, "loss": 0.3603, "step": 10 }, { "epoch": 0.003388681802778719, "grad_norm": 2.084803581237793, "learning_rate": 8.120000000000001e-05, "loss": 0.2829, "step": 20 }, { "epoch": 0.005083022704168078, "grad_norm": 2.351337194442749, "learning_rate": 0.00012179999999999999, "loss": 0.2922, "step": 30 }, { "epoch": 0.006777363605557438, "grad_norm": 3.6437880992889404, "learning_rate": 0.00016240000000000002, "loss": 0.2862, "step": 40 }, { "epoch": 0.008471704506946797, "grad_norm": 10.51866340637207, "learning_rate": 0.000203, "loss": 0.3485, "step": 50 }, { "epoch": 0.008471704506946797, "eval_loss": 0.16538894176483154, "eval_runtime": 202.5859, "eval_samples_per_second": 12.266, "eval_steps_per_second": 3.07, "step": 50 }, { "epoch": 0.010166045408336157, "grad_norm": 1.790229082107544, "learning_rate": 0.00020275275110137215, "loss": 0.2842, "step": 60 }, { "epoch": 0.011860386309725517, "grad_norm": 1.7886559963226318, "learning_rate": 0.00020201220897726938, "loss": 0.3127, "step": 70 }, { "epoch": 0.013554727211114876, "grad_norm": 2.6341745853424072, "learning_rate": 0.00020078198147448128, "loss": 0.3139, "step": 80 }, { "epoch": 0.015249068112504236, "grad_norm": 3.013049364089966, "learning_rate": 0.00019906806213773937, "loss": 0.327, "step": 90 }, { "epoch": 0.016943409013893594, "grad_norm": 6.521936893463135, "learning_rate": 0.0001968788010097697, "loss": 0.3893, "step": 100 }, { "epoch": 0.016943409013893594, "eval_loss": 0.1979130208492279, "eval_runtime": 202.6819, "eval_samples_per_second": 12.261, "eval_steps_per_second": 3.069, "step": 100 }, { "epoch": 0.018637749915282956, "grad_norm": 1.9125583171844482, "learning_rate": 0.00019422486395072398, "loss": 0.3318, "step": 110 }, { "epoch": 0.020332090816672314, "grad_norm": 1.7610540390014648, "learning_rate": 0.0001911191806751811, "loss": 0.2976, "step": 120 }, { "epoch": 0.022026431718061675, "grad_norm": 2.0341711044311523, "learning_rate": 0.00018757688175987723, "loss": 0.3682, "step": 130 }, { "epoch": 0.023720772619451033, "grad_norm": 2.6553566455841064, "learning_rate": 0.00018361522492905716, "loss": 0.3767, "step": 140 }, { "epoch": 0.025415113520840395, "grad_norm": 7.6121907234191895, "learning_rate": 0.00017925351097657625, "loss": 0.398, "step": 150 }, { "epoch": 0.025415113520840395, "eval_loss": 0.19531093537807465, "eval_runtime": 202.675, "eval_samples_per_second": 12.261, "eval_steps_per_second": 3.069, "step": 150 }, { "epoch": 0.027109454422229753, "grad_norm": 1.6871684789657593, "learning_rate": 0.00017451298973437308, "loss": 0.3654, "step": 160 }, { "epoch": 0.02880379532361911, "grad_norm": 2.150817394256592, "learning_rate": 0.0001694167565454241, "loss": 0.3326, "step": 170 }, { "epoch": 0.030498136225008472, "grad_norm": 2.821345329284668, "learning_rate": 0.0001639896397455543, "loss": 0.3337, "step": 180 }, { "epoch": 0.032192477126397834, "grad_norm": 2.3680481910705566, "learning_rate": 0.0001582580797022808, "loss": 0.3199, "step": 190 }, { "epoch": 0.03388681802778719, "grad_norm": 6.9135589599609375, "learning_rate": 0.00015225, "loss": 0.3978, "step": 200 }, { "epoch": 0.03388681802778719, "eval_loss": 0.19054758548736572, "eval_runtime": 202.7026, "eval_samples_per_second": 12.259, "eval_steps_per_second": 3.069, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.080119355480474e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }