{ "best_metric": 0.5168118476867676, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.37037037037037035, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001851851851851852, "eval_loss": 1.665893316268921, "eval_runtime": 23.8147, "eval_samples_per_second": 9.574, "eval_steps_per_second": 2.393, "step": 1 }, { "epoch": 0.018518518518518517, "grad_norm": 21.659528732299805, "learning_rate": 4.2800000000000004e-05, "loss": 2.1673, "step": 10 }, { "epoch": 0.037037037037037035, "grad_norm": 16.18787956237793, "learning_rate": 8.560000000000001e-05, "loss": 1.4786, "step": 20 }, { "epoch": 0.05555555555555555, "grad_norm": 7.465191841125488, "learning_rate": 0.0001284, "loss": 0.9593, "step": 30 }, { "epoch": 0.07407407407407407, "grad_norm": 3.2875595092773438, "learning_rate": 0.00017120000000000001, "loss": 0.4504, "step": 40 }, { "epoch": 0.09259259259259259, "grad_norm": 9.41701602935791, "learning_rate": 0.000214, "loss": 0.3461, "step": 50 }, { "epoch": 0.09259259259259259, "eval_loss": 0.5168118476867676, "eval_runtime": 23.7827, "eval_samples_per_second": 9.587, "eval_steps_per_second": 2.397, "step": 50 }, { "epoch": 0.1111111111111111, "grad_norm": 10.893043518066406, "learning_rate": 0.00021373935337780118, "loss": 1.3228, "step": 60 }, { "epoch": 0.12962962962962962, "grad_norm": 11.86053466796875, "learning_rate": 0.00021295868335534802, "loss": 0.9465, "step": 70 }, { "epoch": 0.14814814814814814, "grad_norm": 245.02352905273438, "learning_rate": 0.0002116617932785172, "loss": 0.8149, "step": 80 }, { "epoch": 0.16666666666666666, "grad_norm": 6.186165809631348, "learning_rate": 0.00020985500146540012, "loss": 0.4509, "step": 90 }, { "epoch": 0.18518518518518517, "grad_norm": 8.681466102600098, "learning_rate": 0.0002075471104240922, "loss": 0.4001, "step": 100 }, { "epoch": 0.18518518518518517, "eval_loss": 0.7028859257698059, "eval_runtime": 24.0444, "eval_samples_per_second": 9.482, "eval_steps_per_second": 2.371, "step": 100 }, { "epoch": 0.2037037037037037, "grad_norm": 12.92205810546875, "learning_rate": 0.00020474936396775828, "loss": 1.4676, "step": 110 }, { "epoch": 0.2222222222222222, "grad_norm": 15.443918228149414, "learning_rate": 0.00020147539243590517, "loss": 1.0882, "step": 120 }, { "epoch": 0.24074074074074073, "grad_norm": 7.393853664398193, "learning_rate": 0.00019774114628873756, "loss": 0.6892, "step": 130 }, { "epoch": 0.25925925925925924, "grad_norm": 17.222841262817383, "learning_rate": 0.00019356481839811937, "loss": 0.3922, "step": 140 }, { "epoch": 0.2777777777777778, "grad_norm": 1.6537044048309326, "learning_rate": 0.00018896675541373064, "loss": 0.2181, "step": 150 }, { "epoch": 0.2777777777777778, "eval_loss": 0.5269023776054382, "eval_runtime": 23.7879, "eval_samples_per_second": 9.585, "eval_steps_per_second": 2.396, "step": 150 }, { "epoch": 0.2962962962962963, "grad_norm": 10.590658187866211, "learning_rate": 0.00018396935863623567, "loss": 1.6471, "step": 160 }, { "epoch": 0.3148148148148148, "grad_norm": 4.128945350646973, "learning_rate": 0.00017859697488039784, "loss": 0.9846, "step": 170 }, { "epoch": 0.3333333333333333, "grad_norm": 9.964460372924805, "learning_rate": 0.00017287577785984542, "loss": 1.0229, "step": 180 }, { "epoch": 0.35185185185185186, "grad_norm": 16.727569580078125, "learning_rate": 0.0001668336406713699, "loss": 0.3647, "step": 190 }, { "epoch": 0.37037037037037035, "grad_norm": 1.3600995540618896, "learning_rate": 0.0001605, "loss": 0.2746, "step": 200 }, { "epoch": 0.37037037037037035, "eval_loss": 0.5395556688308716, "eval_runtime": 23.7998, "eval_samples_per_second": 9.58, "eval_steps_per_second": 2.395, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.571948497495654e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }