robiual-awal's picture
Training in progress, step 200, checkpoint
223ddff verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.004024934469035676,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.012467234517838e-05,
"eval_loss": 2.1559741497039795,
"eval_runtime": 857.823,
"eval_samples_per_second": 24.391,
"eval_steps_per_second": 12.196,
"step": 1
},
{
"epoch": 0.0002012467234517838,
"grad_norm": 6.170618534088135,
"learning_rate": 0.0002,
"loss": 1.4469,
"step": 10
},
{
"epoch": 0.0004024934469035676,
"grad_norm": 1.0294487476348877,
"learning_rate": 0.0002,
"loss": 1.2263,
"step": 20
},
{
"epoch": 0.0006037401703553514,
"grad_norm": 0.6725739240646362,
"learning_rate": 0.0002,
"loss": 1.2202,
"step": 30
},
{
"epoch": 0.0008049868938071352,
"grad_norm": 1.0152796506881714,
"learning_rate": 0.0002,
"loss": 1.29,
"step": 40
},
{
"epoch": 0.001006233617258919,
"grad_norm": 0.8606889843940735,
"learning_rate": 0.0002,
"loss": 1.3785,
"step": 50
},
{
"epoch": 0.001006233617258919,
"eval_loss": 1.4589308500289917,
"eval_runtime": 858.7739,
"eval_samples_per_second": 24.364,
"eval_steps_per_second": 12.182,
"step": 50
},
{
"epoch": 0.0012074803407107028,
"grad_norm": 0.7305423021316528,
"learning_rate": 0.0002,
"loss": 1.2205,
"step": 60
},
{
"epoch": 0.0014087270641624865,
"grad_norm": 1.4533841609954834,
"learning_rate": 0.0002,
"loss": 1.2138,
"step": 70
},
{
"epoch": 0.0016099737876142705,
"grad_norm": 1.0131192207336426,
"learning_rate": 0.0002,
"loss": 1.3712,
"step": 80
},
{
"epoch": 0.0018112205110660542,
"grad_norm": 0.9000447988510132,
"learning_rate": 0.0002,
"loss": 1.1396,
"step": 90
},
{
"epoch": 0.002012467234517838,
"grad_norm": 1.9898265600204468,
"learning_rate": 0.0002,
"loss": 1.229,
"step": 100
},
{
"epoch": 0.002012467234517838,
"eval_loss": 1.4349274635314941,
"eval_runtime": 858.4486,
"eval_samples_per_second": 24.373,
"eval_steps_per_second": 12.187,
"step": 100
},
{
"epoch": 0.002213713957969622,
"grad_norm": 1.271390676498413,
"learning_rate": 0.0002,
"loss": 1.3306,
"step": 110
},
{
"epoch": 0.0024149606814214057,
"grad_norm": 0.9250841736793518,
"learning_rate": 0.0002,
"loss": 1.1564,
"step": 120
},
{
"epoch": 0.0026162074048731894,
"grad_norm": 1.0612225532531738,
"learning_rate": 0.0002,
"loss": 1.2448,
"step": 130
},
{
"epoch": 0.002817454128324973,
"grad_norm": 0.643467366695404,
"learning_rate": 0.0002,
"loss": 1.1702,
"step": 140
},
{
"epoch": 0.003018700851776757,
"grad_norm": 1.3284869194030762,
"learning_rate": 0.0002,
"loss": 1.2826,
"step": 150
},
{
"epoch": 0.003018700851776757,
"eval_loss": 1.407882809638977,
"eval_runtime": 858.6574,
"eval_samples_per_second": 24.367,
"eval_steps_per_second": 12.184,
"step": 150
},
{
"epoch": 0.003219947575228541,
"grad_norm": 1.3226466178894043,
"learning_rate": 0.0002,
"loss": 1.3085,
"step": 160
},
{
"epoch": 0.0034211942986803246,
"grad_norm": 1.9192092418670654,
"learning_rate": 0.0002,
"loss": 1.1933,
"step": 170
},
{
"epoch": 0.0036224410221321083,
"grad_norm": 1.0368775129318237,
"learning_rate": 0.0002,
"loss": 1.2231,
"step": 180
},
{
"epoch": 0.003823687745583892,
"grad_norm": 1.5248116254806519,
"learning_rate": 0.0002,
"loss": 1.2734,
"step": 190
},
{
"epoch": 0.004024934469035676,
"grad_norm": 0.9554014801979065,
"learning_rate": 0.0002,
"loss": 1.1498,
"step": 200
},
{
"epoch": 0.004024934469035676,
"eval_loss": 1.397660255432129,
"eval_runtime": 858.6574,
"eval_samples_per_second": 24.367,
"eval_steps_per_second": 12.184,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.68217396985856e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}