{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9933774834437086, "eval_steps": 500, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013245033112582781, "grad_norm": 0.4943664073944092, "learning_rate": 2.5e-05, "loss": 1.4272, "step": 1 }, { "epoch": 0.06622516556291391, "grad_norm": 0.20984387397766113, "learning_rate": 0.000125, "loss": 1.3101, "step": 5 }, { "epoch": 0.13245033112582782, "grad_norm": 0.2290477156639099, "learning_rate": 0.00019956059820218982, "loss": 1.2917, "step": 10 }, { "epoch": 0.1986754966887417, "grad_norm": 0.15163910388946533, "learning_rate": 0.00019466156752904343, "loss": 1.2823, "step": 15 }, { "epoch": 0.26490066225165565, "grad_norm": 0.1627238243818283, "learning_rate": 0.00018458320592590975, "loss": 1.1889, "step": 20 }, { "epoch": 0.33112582781456956, "grad_norm": 0.15383219718933105, "learning_rate": 0.00016987694277788417, "loss": 1.198, "step": 25 }, { "epoch": 0.3973509933774834, "grad_norm": 0.1501755714416504, "learning_rate": 0.0001513474193514842, "loss": 1.1762, "step": 30 }, { "epoch": 0.46357615894039733, "grad_norm": 0.14539840817451477, "learning_rate": 0.0001300084635000341, "loss": 1.2176, "step": 35 }, { "epoch": 0.5298013245033113, "grad_norm": 0.12844280898571014, "learning_rate": 0.0001070276188945293, "loss": 1.1942, "step": 40 }, { "epoch": 0.5960264900662252, "grad_norm": 0.13806107640266418, "learning_rate": 8.366226381814697e-05, "loss": 1.2928, "step": 45 }, { "epoch": 0.6622516556291391, "grad_norm": 0.13188520073890686, "learning_rate": 6.119081473277501e-05, "loss": 1.1959, "step": 50 }, { "epoch": 0.7284768211920529, "grad_norm": 0.12824179232120514, "learning_rate": 4.084277875864776e-05, "loss": 1.1188, "step": 55 }, { "epoch": 0.7947019867549668, "grad_norm": 0.14250224828720093, "learning_rate": 2.3731482188961818e-05, "loss": 1.2076, "step": 60 }, { "epoch": 0.8609271523178808, "grad_norm": 0.14001749455928802, "learning_rate": 1.0793155744261351e-05, "loss": 1.1352, "step": 65 }, { "epoch": 0.9271523178807947, "grad_norm": 0.15154731273651123, "learning_rate": 2.735709467518699e-06, "loss": 1.1486, "step": 70 }, { "epoch": 0.9933774834437086, "grad_norm": 0.14987458288669586, "learning_rate": 0.0, "loss": 1.2385, "step": 75 }, { "epoch": 0.9933774834437086, "step": 75, "total_flos": 5.564154814608179e+16, "train_loss": 0.0, "train_runtime": 0.9326, "train_samples_per_second": 645.502, "train_steps_per_second": 80.42 } ], "logging_steps": 5, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.564154814608179e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }