|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.999822427417207, |
|
"eval_steps": 500, |
|
"global_step": 33789, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08878629139660836, |
|
"grad_norm": 5.894807815551758, |
|
"learning_rate": 2.959455460195324e-05, |
|
"loss": 2.7622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17757258279321672, |
|
"grad_norm": 4.288614749908447, |
|
"learning_rate": 5.918910920390648e-05, |
|
"loss": 1.8383, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26635887418982507, |
|
"grad_norm": 3.5310869216918945, |
|
"learning_rate": 8.878366380585973e-05, |
|
"loss": 1.699, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.35514516558643344, |
|
"grad_norm": 3.1121788024902344, |
|
"learning_rate": 9.795790858270306e-05, |
|
"loss": 1.6439, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4439314569830418, |
|
"grad_norm": 2.1338531970977783, |
|
"learning_rate": 9.466951660637949e-05, |
|
"loss": 1.5719, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5327177483796501, |
|
"grad_norm": 2.2646570205688477, |
|
"learning_rate": 9.138112463005591e-05, |
|
"loss": 1.5305, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6215040397762586, |
|
"grad_norm": 2.2252941131591797, |
|
"learning_rate": 8.809273265373233e-05, |
|
"loss": 1.5358, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7102903311728669, |
|
"grad_norm": 1.8011505603790283, |
|
"learning_rate": 8.480434067740875e-05, |
|
"loss": 1.5985, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7990766225694753, |
|
"grad_norm": 1.3360363245010376, |
|
"learning_rate": 8.151594870108517e-05, |
|
"loss": 1.4335, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.8878629139660836, |
|
"grad_norm": 1.613250494003296, |
|
"learning_rate": 7.82275567247616e-05, |
|
"loss": 1.4029, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.976649205362692, |
|
"grad_norm": 1.398319125175476, |
|
"learning_rate": 7.493916474843802e-05, |
|
"loss": 1.4044, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0653467104679037, |
|
"grad_norm": 2.5596816539764404, |
|
"learning_rate": 7.165077277211444e-05, |
|
"loss": 1.4486, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1541330018645122, |
|
"grad_norm": 1.8799223899841309, |
|
"learning_rate": 6.836238079579086e-05, |
|
"loss": 1.3566, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.2429192932611204, |
|
"grad_norm": 1.6905425786972046, |
|
"learning_rate": 6.507398881946728e-05, |
|
"loss": 1.2982, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.3317055846577288, |
|
"grad_norm": 2.4025771617889404, |
|
"learning_rate": 6.17855968431437e-05, |
|
"loss": 1.2906, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.4204918760543372, |
|
"grad_norm": 1.450578212738037, |
|
"learning_rate": 5.8497204866820135e-05, |
|
"loss": 1.3094, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.5092781674509457, |
|
"grad_norm": 1.8370212316513062, |
|
"learning_rate": 5.520881289049655e-05, |
|
"loss": 1.3086, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.598064458847554, |
|
"grad_norm": 2.5083975791931152, |
|
"learning_rate": 5.192042091417297e-05, |
|
"loss": 1.3165, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.6868507502441623, |
|
"grad_norm": 2.7560970783233643, |
|
"learning_rate": 4.8632028937849395e-05, |
|
"loss": 1.3658, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7756370416407705, |
|
"grad_norm": 2.0161373615264893, |
|
"learning_rate": 4.534363696152582e-05, |
|
"loss": 1.2169, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.864423333037379, |
|
"grad_norm": 1.949626088142395, |
|
"learning_rate": 4.205524498520223e-05, |
|
"loss": 1.1678, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.9532096244339874, |
|
"grad_norm": 1.920861840248108, |
|
"learning_rate": 3.876685300887866e-05, |
|
"loss": 1.185, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.041907129539199, |
|
"grad_norm": 3.2169787883758545, |
|
"learning_rate": 3.5478461032555084e-05, |
|
"loss": 1.2204, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.1306934209358075, |
|
"grad_norm": 3.190474033355713, |
|
"learning_rate": 3.2190069056231506e-05, |
|
"loss": 1.1745, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.219479712332416, |
|
"grad_norm": 2.9770030975341797, |
|
"learning_rate": 2.8901677079907925e-05, |
|
"loss": 1.0429, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.3082660037290244, |
|
"grad_norm": 3.3964896202087402, |
|
"learning_rate": 2.561328510358435e-05, |
|
"loss": 1.0617, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.397052295125633, |
|
"grad_norm": 2.1896071434020996, |
|
"learning_rate": 2.2324893127260772e-05, |
|
"loss": 1.1053, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.4858385865222408, |
|
"grad_norm": 2.337308883666992, |
|
"learning_rate": 1.903650115093719e-05, |
|
"loss": 1.1843, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.574624877918849, |
|
"grad_norm": 3.1749579906463623, |
|
"learning_rate": 1.5748109174613617e-05, |
|
"loss": 1.173, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.6634111693154576, |
|
"grad_norm": 4.609279155731201, |
|
"learning_rate": 1.2459717198290037e-05, |
|
"loss": 1.1715, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.752197460712066, |
|
"grad_norm": 3.8842334747314453, |
|
"learning_rate": 9.17132522196646e-06, |
|
"loss": 1.0078, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.8409837521086745, |
|
"grad_norm": 2.983306884765625, |
|
"learning_rate": 5.882933245642881e-06, |
|
"loss": 0.9326, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.929770043505283, |
|
"grad_norm": 3.145293712615967, |
|
"learning_rate": 2.594541269319303e-06, |
|
"loss": 0.9389, |
|
"step": 33000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 33789, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.811865797575311e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|