redPajama-3b-zAgile-base / trainer_state.json
dtorres-zAgile's picture
Upload 25 files
0be3778
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.33,
"learning_rate": 4.061954955173073e-06,
"loss": 2.0922,
"step": 10
},
{
"epoch": 6.67,
"learning_rate": 5.28472523771611e-06,
"loss": 1.5474,
"step": 20
},
{
"epoch": 6.67,
"eval_loss": 2.08203125,
"eval_runtime": 2.8721,
"eval_samples_per_second": 3.83,
"eval_steps_per_second": 0.696,
"step": 20
},
{
"epoch": 10.0,
"learning_rate": 6e-06,
"loss": 0.8877,
"step": 30
},
{
"epoch": 13.33,
"learning_rate": 6e-06,
"loss": 0.2622,
"step": 40
},
{
"epoch": 13.33,
"eval_loss": 2.9765625,
"eval_runtime": 2.8096,
"eval_samples_per_second": 3.915,
"eval_steps_per_second": 0.712,
"step": 40
},
{
"epoch": 16.67,
"learning_rate": 6e-06,
"loss": 0.0353,
"step": 50
},
{
"epoch": 20.0,
"learning_rate": 6e-06,
"loss": 0.0066,
"step": 60
},
{
"epoch": 20.0,
"eval_loss": 3.96484375,
"eval_runtime": 2.8073,
"eval_samples_per_second": 3.918,
"eval_steps_per_second": 0.712,
"step": 60
},
{
"epoch": 23.33,
"learning_rate": 6e-06,
"loss": 0.0022,
"step": 70
},
{
"epoch": 26.67,
"learning_rate": 6e-06,
"loss": 0.001,
"step": 80
},
{
"epoch": 26.67,
"eval_loss": 4.140625,
"eval_runtime": 2.8092,
"eval_samples_per_second": 3.916,
"eval_steps_per_second": 0.712,
"step": 80
},
{
"epoch": 30.0,
"learning_rate": 6e-06,
"loss": 0.0009,
"step": 90
},
{
"epoch": 33.33,
"learning_rate": 6e-06,
"loss": 0.0006,
"step": 100
},
{
"epoch": 33.33,
"eval_loss": 4.22265625,
"eval_runtime": 2.8076,
"eval_samples_per_second": 3.918,
"eval_steps_per_second": 0.712,
"step": 100
},
{
"epoch": 36.67,
"learning_rate": 6e-06,
"loss": 0.0005,
"step": 110
},
{
"epoch": 40.0,
"learning_rate": 6e-06,
"loss": 0.0004,
"step": 120
},
{
"epoch": 40.0,
"eval_loss": 4.2890625,
"eval_runtime": 2.8093,
"eval_samples_per_second": 3.916,
"eval_steps_per_second": 0.712,
"step": 120
},
{
"epoch": 43.33,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 130
},
{
"epoch": 46.67,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 140
},
{
"epoch": 46.67,
"eval_loss": 4.3515625,
"eval_runtime": 2.8091,
"eval_samples_per_second": 3.916,
"eval_steps_per_second": 0.712,
"step": 140
},
{
"epoch": 50.0,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 150
},
{
"epoch": 53.33,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 160
},
{
"epoch": 53.33,
"eval_loss": 4.390625,
"eval_runtime": 2.8088,
"eval_samples_per_second": 3.916,
"eval_steps_per_second": 0.712,
"step": 160
},
{
"epoch": 56.67,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 170
},
{
"epoch": 60.0,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 180
},
{
"epoch": 60.0,
"eval_loss": 4.42578125,
"eval_runtime": 2.808,
"eval_samples_per_second": 3.917,
"eval_steps_per_second": 0.712,
"step": 180
},
{
"epoch": 63.33,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 190
},
{
"epoch": 66.67,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 200
},
{
"epoch": 66.67,
"eval_loss": 4.453125,
"eval_runtime": 2.8089,
"eval_samples_per_second": 3.916,
"eval_steps_per_second": 0.712,
"step": 200
},
{
"epoch": 70.0,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 210
},
{
"epoch": 73.33,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 220
},
{
"epoch": 73.33,
"eval_loss": 4.47265625,
"eval_runtime": 2.8074,
"eval_samples_per_second": 3.918,
"eval_steps_per_second": 0.712,
"step": 220
},
{
"epoch": 76.67,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 230
},
{
"epoch": 80.0,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 240
},
{
"epoch": 80.0,
"eval_loss": 4.49609375,
"eval_runtime": 2.8069,
"eval_samples_per_second": 3.919,
"eval_steps_per_second": 0.713,
"step": 240
},
{
"epoch": 83.33,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 250
},
{
"epoch": 86.67,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 260
},
{
"epoch": 86.67,
"eval_loss": 4.51953125,
"eval_runtime": 2.8067,
"eval_samples_per_second": 3.919,
"eval_steps_per_second": 0.713,
"step": 260
},
{
"epoch": 90.0,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 270
},
{
"epoch": 93.33,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 280
},
{
"epoch": 93.33,
"eval_loss": 4.5390625,
"eval_runtime": 2.8074,
"eval_samples_per_second": 3.918,
"eval_steps_per_second": 0.712,
"step": 280
},
{
"epoch": 96.67,
"learning_rate": 6e-06,
"loss": 0.0002,
"step": 290
},
{
"epoch": 100.0,
"learning_rate": 6e-06,
"loss": 0.0003,
"step": 300
},
{
"epoch": 100.0,
"eval_loss": 4.5546875,
"eval_runtime": 2.8081,
"eval_samples_per_second": 3.917,
"eval_steps_per_second": 0.712,
"step": 300
},
{
"epoch": 100.0,
"step": 300,
"total_flos": 28875518115840.0,
"train_loss": 0.16138767729202907,
"train_runtime": 5518.2957,
"train_samples_per_second": 0.797,
"train_steps_per_second": 0.054
}
],
"max_steps": 300,
"num_train_epochs": 100,
"total_flos": 28875518115840.0,
"trial_name": null,
"trial_params": null
}