oldiday's picture
Training in progress, step 100, checkpoint
bcdfd16 verified
raw
history blame
9.03 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.031203682034480068,
"eval_steps": 9,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003120368203448007,
"eval_loss": 2.0700910091400146,
"eval_runtime": 99.8915,
"eval_samples_per_second": 54.039,
"eval_steps_per_second": 6.757,
"step": 1
},
{
"epoch": 0.000936110461034402,
"grad_norm": 0.7371235489845276,
"learning_rate": 1.5e-05,
"loss": 2.0984,
"step": 3
},
{
"epoch": 0.001872220922068804,
"grad_norm": 0.8994637131690979,
"learning_rate": 3e-05,
"loss": 2.0438,
"step": 6
},
{
"epoch": 0.002808331383103206,
"grad_norm": 0.9350093007087708,
"learning_rate": 4.5e-05,
"loss": 1.981,
"step": 9
},
{
"epoch": 0.002808331383103206,
"eval_loss": 2.015099048614502,
"eval_runtime": 100.6931,
"eval_samples_per_second": 53.608,
"eval_steps_per_second": 6.704,
"step": 9
},
{
"epoch": 0.003744441844137608,
"grad_norm": 1.0670744180679321,
"learning_rate": 4.993910125649561e-05,
"loss": 1.9626,
"step": 12
},
{
"epoch": 0.00468055230517201,
"grad_norm": 0.9438199996948242,
"learning_rate": 4.962019382530521e-05,
"loss": 1.8207,
"step": 15
},
{
"epoch": 0.005616662766206412,
"grad_norm": 0.8364858031272888,
"learning_rate": 4.9031542398457974e-05,
"loss": 1.8106,
"step": 18
},
{
"epoch": 0.005616662766206412,
"eval_loss": 1.6422984600067139,
"eval_runtime": 99.4187,
"eval_samples_per_second": 54.296,
"eval_steps_per_second": 6.789,
"step": 18
},
{
"epoch": 0.006552773227240814,
"grad_norm": 0.6442909240722656,
"learning_rate": 4.817959636416969e-05,
"loss": 1.5737,
"step": 21
},
{
"epoch": 0.007488883688275216,
"grad_norm": 0.6347518563270569,
"learning_rate": 4.707368982147318e-05,
"loss": 1.6011,
"step": 24
},
{
"epoch": 0.008424994149309618,
"grad_norm": 0.7162268161773682,
"learning_rate": 4.572593931387604e-05,
"loss": 1.4523,
"step": 27
},
{
"epoch": 0.008424994149309618,
"eval_loss": 1.364453673362732,
"eval_runtime": 100.9594,
"eval_samples_per_second": 53.467,
"eval_steps_per_second": 6.686,
"step": 27
},
{
"epoch": 0.00936110461034402,
"grad_norm": 0.586948812007904,
"learning_rate": 4.415111107797445e-05,
"loss": 1.315,
"step": 30
},
{
"epoch": 0.010297215071378422,
"grad_norm": 0.5078171491622925,
"learning_rate": 4.2366459261474933e-05,
"loss": 1.404,
"step": 33
},
{
"epoch": 0.011233325532412824,
"grad_norm": 0.42401692271232605,
"learning_rate": 4.039153688314145e-05,
"loss": 1.2328,
"step": 36
},
{
"epoch": 0.011233325532412824,
"eval_loss": 1.2021371126174927,
"eval_runtime": 101.4814,
"eval_samples_per_second": 53.192,
"eval_steps_per_second": 6.651,
"step": 36
},
{
"epoch": 0.012169435993447227,
"grad_norm": 0.4761180579662323,
"learning_rate": 3.824798160583012e-05,
"loss": 1.2958,
"step": 39
},
{
"epoch": 0.013105546454481629,
"grad_norm": 0.4139047861099243,
"learning_rate": 3.5959278669726935e-05,
"loss": 1.2068,
"step": 42
},
{
"epoch": 0.01404165691551603,
"grad_norm": 0.45918866991996765,
"learning_rate": 3.355050358314172e-05,
"loss": 1.0381,
"step": 45
},
{
"epoch": 0.01404165691551603,
"eval_loss": 1.118904948234558,
"eval_runtime": 98.9835,
"eval_samples_per_second": 54.534,
"eval_steps_per_second": 6.819,
"step": 45
},
{
"epoch": 0.014977767376550433,
"grad_norm": 0.4343072772026062,
"learning_rate": 3.104804738999169e-05,
"loss": 1.0742,
"step": 48
},
{
"epoch": 0.015913877837584835,
"grad_norm": 0.428012490272522,
"learning_rate": 2.8479327524001636e-05,
"loss": 1.092,
"step": 51
},
{
"epoch": 0.016849988298619237,
"grad_norm": 0.3722698986530304,
"learning_rate": 2.587248741756253e-05,
"loss": 0.974,
"step": 54
},
{
"epoch": 0.016849988298619237,
"eval_loss": 1.0687353610992432,
"eval_runtime": 99.0201,
"eval_samples_per_second": 54.514,
"eval_steps_per_second": 6.817,
"step": 54
},
{
"epoch": 0.01778609875965364,
"grad_norm": 0.40713074803352356,
"learning_rate": 2.3256088156396868e-05,
"loss": 1.1488,
"step": 57
},
{
"epoch": 0.01872220922068804,
"grad_norm": 0.4211384057998657,
"learning_rate": 2.0658795558326743e-05,
"loss": 1.0259,
"step": 60
},
{
"epoch": 0.019658319681722443,
"grad_norm": 0.3947371244430542,
"learning_rate": 1.8109066104575023e-05,
"loss": 1.0927,
"step": 63
},
{
"epoch": 0.019658319681722443,
"eval_loss": 1.0422518253326416,
"eval_runtime": 98.9882,
"eval_samples_per_second": 54.532,
"eval_steps_per_second": 6.819,
"step": 63
},
{
"epoch": 0.020594430142756845,
"grad_norm": 0.3636433780193329,
"learning_rate": 1.56348351646022e-05,
"loss": 0.9972,
"step": 66
},
{
"epoch": 0.021530540603791247,
"grad_norm": 0.40044713020324707,
"learning_rate": 1.3263210930352737e-05,
"loss": 1.0174,
"step": 69
},
{
"epoch": 0.02246665106482565,
"grad_norm": 0.40297916531562805,
"learning_rate": 1.1020177413231334e-05,
"loss": 1.0661,
"step": 72
},
{
"epoch": 0.02246665106482565,
"eval_loss": 1.0281081199645996,
"eval_runtime": 98.8829,
"eval_samples_per_second": 54.59,
"eval_steps_per_second": 6.826,
"step": 72
},
{
"epoch": 0.02340276152586005,
"grad_norm": 0.37431633472442627,
"learning_rate": 8.930309757836517e-06,
"loss": 1.0517,
"step": 75
},
{
"epoch": 0.024338871986894453,
"grad_norm": 0.41993048787117004,
"learning_rate": 7.016504991533726e-06,
"loss": 1.0507,
"step": 78
},
{
"epoch": 0.025274982447928855,
"grad_norm": 0.3987724781036377,
"learning_rate": 5.299731159831953e-06,
"loss": 1.0236,
"step": 81
},
{
"epoch": 0.025274982447928855,
"eval_loss": 1.021734356880188,
"eval_runtime": 98.832,
"eval_samples_per_second": 54.618,
"eval_steps_per_second": 6.83,
"step": 81
},
{
"epoch": 0.026211092908963257,
"grad_norm": 0.42730259895324707,
"learning_rate": 3.798797596089351e-06,
"loss": 1.0945,
"step": 84
},
{
"epoch": 0.02714720336999766,
"grad_norm": 0.3776666820049286,
"learning_rate": 2.5301488425208296e-06,
"loss": 0.9927,
"step": 87
},
{
"epoch": 0.02808331383103206,
"grad_norm": 0.3661386966705322,
"learning_rate": 1.5076844803522922e-06,
"loss": 0.976,
"step": 90
},
{
"epoch": 0.02808331383103206,
"eval_loss": 1.0193862915039062,
"eval_runtime": 98.9139,
"eval_samples_per_second": 54.573,
"eval_steps_per_second": 6.824,
"step": 90
},
{
"epoch": 0.029019424292066463,
"grad_norm": 0.4045335054397583,
"learning_rate": 7.426068431000882e-07,
"loss": 0.987,
"step": 93
},
{
"epoch": 0.029955534753100865,
"grad_norm": 0.3902008831501007,
"learning_rate": 2.4329828146074095e-07,
"loss": 1.0379,
"step": 96
},
{
"epoch": 0.030891645214135267,
"grad_norm": 0.37765341997146606,
"learning_rate": 1.522932452260595e-08,
"loss": 1.033,
"step": 99
},
{
"epoch": 0.030891645214135267,
"eval_loss": 1.0190349817276,
"eval_runtime": 98.9222,
"eval_samples_per_second": 54.568,
"eval_steps_per_second": 6.824,
"step": 99
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.05873754406912e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}