{ "best_metric": 1.1903597116470337, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.0010412147505422993, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.470715835140998e-05, "eval_loss": 1.3499997854232788, "eval_runtime": 1831.1632, "eval_samples_per_second": 6.625, "eval_steps_per_second": 3.313, "step": 1 }, { "epoch": 0.00010412147505422994, "grad_norm": 3.2817139625549316, "learning_rate": 3e-05, "loss": 5.6987, "step": 3 }, { "epoch": 0.0001735357917570499, "eval_loss": 1.3320451974868774, "eval_runtime": 1837.8637, "eval_samples_per_second": 6.601, "eval_steps_per_second": 3.301, "step": 5 }, { "epoch": 0.00020824295010845987, "grad_norm": 2.2102863788604736, "learning_rate": 6e-05, "loss": 5.5276, "step": 6 }, { "epoch": 0.0003123644251626898, "grad_norm": 4.29848575592041, "learning_rate": 9e-05, "loss": 5.1643, "step": 9 }, { "epoch": 0.0003470715835140998, "eval_loss": 1.2517874240875244, "eval_runtime": 2333.7955, "eval_samples_per_second": 5.198, "eval_steps_per_second": 2.599, "step": 10 }, { "epoch": 0.00041648590021691975, "grad_norm": 3.965546131134033, "learning_rate": 9.755282581475769e-05, "loss": 5.3549, "step": 12 }, { "epoch": 0.0005206073752711496, "grad_norm": 2.184412956237793, "learning_rate": 8.535533905932738e-05, "loss": 4.4727, "step": 15 }, { "epoch": 0.0005206073752711496, "eval_loss": 1.2216558456420898, "eval_runtime": 1479.7059, "eval_samples_per_second": 8.199, "eval_steps_per_second": 4.099, "step": 15 }, { "epoch": 0.0006247288503253796, "grad_norm": 2.2891042232513428, "learning_rate": 6.545084971874738e-05, "loss": 5.0955, "step": 18 }, { "epoch": 0.0006941431670281996, "eval_loss": 1.1996384859085083, "eval_runtime": 1103.7842, "eval_samples_per_second": 10.991, "eval_steps_per_second": 5.496, "step": 20 }, { "epoch": 0.0007288503253796095, "grad_norm": 2.0232975482940674, "learning_rate": 4.2178276747988446e-05, "loss": 4.4981, "step": 21 }, { "epoch": 0.0008329718004338395, "grad_norm": 2.182401418685913, "learning_rate": 2.061073738537635e-05, "loss": 4.6256, "step": 24 }, { "epoch": 0.0008676789587852494, "eval_loss": 1.1918320655822754, "eval_runtime": 1104.7539, "eval_samples_per_second": 10.982, "eval_steps_per_second": 5.491, "step": 25 }, { "epoch": 0.0009370932754880694, "grad_norm": 2.29049015045166, "learning_rate": 5.449673790581611e-06, "loss": 4.8211, "step": 27 }, { "epoch": 0.0010412147505422993, "grad_norm": 4.216933727264404, "learning_rate": 0.0, "loss": 5.218, "step": 30 }, { "epoch": 0.0010412147505422993, "eval_loss": 1.1903597116470337, "eval_runtime": 1104.119, "eval_samples_per_second": 10.988, "eval_steps_per_second": 5.494, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6063618313420800.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }