{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999822427417207, "eval_steps": 500, "global_step": 33789, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08878629139660836, "grad_norm": 5.894807815551758, "learning_rate": 2.959455460195324e-05, "loss": 2.7622, "step": 1000 }, { "epoch": 0.17757258279321672, "grad_norm": 4.288614749908447, "learning_rate": 5.918910920390648e-05, "loss": 1.8383, "step": 2000 }, { "epoch": 0.26635887418982507, "grad_norm": 3.5310869216918945, "learning_rate": 8.878366380585973e-05, "loss": 1.699, "step": 3000 }, { "epoch": 0.35514516558643344, "grad_norm": 3.1121788024902344, "learning_rate": 9.795790858270306e-05, "loss": 1.6439, "step": 4000 }, { "epoch": 0.4439314569830418, "grad_norm": 2.1338531970977783, "learning_rate": 9.466951660637949e-05, "loss": 1.5719, "step": 5000 }, { "epoch": 0.5327177483796501, "grad_norm": 2.2646570205688477, "learning_rate": 9.138112463005591e-05, "loss": 1.5305, "step": 6000 }, { "epoch": 0.6215040397762586, "grad_norm": 2.2252941131591797, "learning_rate": 8.809273265373233e-05, "loss": 1.5358, "step": 7000 }, { "epoch": 0.7102903311728669, "grad_norm": 1.8011505603790283, "learning_rate": 8.480434067740875e-05, "loss": 1.5985, "step": 8000 }, { "epoch": 0.7990766225694753, "grad_norm": 1.3360363245010376, "learning_rate": 8.151594870108517e-05, "loss": 1.4335, "step": 9000 }, { "epoch": 0.8878629139660836, "grad_norm": 1.613250494003296, "learning_rate": 7.82275567247616e-05, "loss": 1.4029, "step": 10000 }, { "epoch": 0.976649205362692, "grad_norm": 1.398319125175476, "learning_rate": 7.493916474843802e-05, "loss": 1.4044, "step": 11000 }, { "epoch": 1.0653467104679037, "grad_norm": 2.5596816539764404, "learning_rate": 7.165077277211444e-05, "loss": 1.4486, "step": 12000 }, { "epoch": 1.1541330018645122, "grad_norm": 1.8799223899841309, "learning_rate": 6.836238079579086e-05, "loss": 1.3566, "step": 13000 }, { "epoch": 1.2429192932611204, "grad_norm": 1.6905425786972046, "learning_rate": 6.507398881946728e-05, "loss": 1.2982, "step": 14000 }, { "epoch": 1.3317055846577288, "grad_norm": 2.4025771617889404, "learning_rate": 6.17855968431437e-05, "loss": 1.2906, "step": 15000 }, { "epoch": 1.4204918760543372, "grad_norm": 1.450578212738037, "learning_rate": 5.8497204866820135e-05, "loss": 1.3094, "step": 16000 }, { "epoch": 1.5092781674509457, "grad_norm": 1.8370212316513062, "learning_rate": 5.520881289049655e-05, "loss": 1.3086, "step": 17000 }, { "epoch": 1.598064458847554, "grad_norm": 2.5083975791931152, "learning_rate": 5.192042091417297e-05, "loss": 1.3165, "step": 18000 }, { "epoch": 1.6868507502441623, "grad_norm": 2.7560970783233643, "learning_rate": 4.8632028937849395e-05, "loss": 1.3658, "step": 19000 }, { "epoch": 1.7756370416407705, "grad_norm": 2.0161373615264893, "learning_rate": 4.534363696152582e-05, "loss": 1.2169, "step": 20000 }, { "epoch": 1.864423333037379, "grad_norm": 1.949626088142395, "learning_rate": 4.205524498520223e-05, "loss": 1.1678, "step": 21000 }, { "epoch": 1.9532096244339874, "grad_norm": 1.920861840248108, "learning_rate": 3.876685300887866e-05, "loss": 1.185, "step": 22000 }, { "epoch": 2.041907129539199, "grad_norm": 3.2169787883758545, "learning_rate": 3.5478461032555084e-05, "loss": 1.2204, "step": 23000 }, { "epoch": 2.1306934209358075, "grad_norm": 3.190474033355713, "learning_rate": 3.2190069056231506e-05, "loss": 1.1745, "step": 24000 }, { "epoch": 2.219479712332416, "grad_norm": 2.9770030975341797, "learning_rate": 2.8901677079907925e-05, "loss": 1.0429, "step": 25000 }, { "epoch": 2.3082660037290244, "grad_norm": 3.3964896202087402, "learning_rate": 2.561328510358435e-05, "loss": 1.0617, "step": 26000 }, { "epoch": 2.397052295125633, "grad_norm": 2.1896071434020996, "learning_rate": 2.2324893127260772e-05, "loss": 1.1053, "step": 27000 }, { "epoch": 2.4858385865222408, "grad_norm": 2.337308883666992, "learning_rate": 1.903650115093719e-05, "loss": 1.1843, "step": 28000 }, { "epoch": 2.574624877918849, "grad_norm": 3.1749579906463623, "learning_rate": 1.5748109174613617e-05, "loss": 1.173, "step": 29000 }, { "epoch": 2.6634111693154576, "grad_norm": 4.609279155731201, "learning_rate": 1.2459717198290037e-05, "loss": 1.1715, "step": 30000 }, { "epoch": 2.752197460712066, "grad_norm": 3.8842334747314453, "learning_rate": 9.17132522196646e-06, "loss": 1.0078, "step": 31000 }, { "epoch": 2.8409837521086745, "grad_norm": 2.983306884765625, "learning_rate": 5.882933245642881e-06, "loss": 0.9326, "step": 32000 }, { "epoch": 2.929770043505283, "grad_norm": 3.145293712615967, "learning_rate": 2.594541269319303e-06, "loss": 0.9389, "step": 33000 } ], "logging_steps": 1000, "max_steps": 33789, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.811865797575311e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }