lesso14's picture
Training in progress, step 200, checkpoint
a9d8dba verified
{
"best_metric": 0.5168118476867676,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.37037037037037035,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001851851851851852,
"eval_loss": 1.665893316268921,
"eval_runtime": 23.8147,
"eval_samples_per_second": 9.574,
"eval_steps_per_second": 2.393,
"step": 1
},
{
"epoch": 0.018518518518518517,
"grad_norm": 21.659528732299805,
"learning_rate": 4.2800000000000004e-05,
"loss": 2.1673,
"step": 10
},
{
"epoch": 0.037037037037037035,
"grad_norm": 16.18787956237793,
"learning_rate": 8.560000000000001e-05,
"loss": 1.4786,
"step": 20
},
{
"epoch": 0.05555555555555555,
"grad_norm": 7.465191841125488,
"learning_rate": 0.0001284,
"loss": 0.9593,
"step": 30
},
{
"epoch": 0.07407407407407407,
"grad_norm": 3.2875595092773438,
"learning_rate": 0.00017120000000000001,
"loss": 0.4504,
"step": 40
},
{
"epoch": 0.09259259259259259,
"grad_norm": 9.41701602935791,
"learning_rate": 0.000214,
"loss": 0.3461,
"step": 50
},
{
"epoch": 0.09259259259259259,
"eval_loss": 0.5168118476867676,
"eval_runtime": 23.7827,
"eval_samples_per_second": 9.587,
"eval_steps_per_second": 2.397,
"step": 50
},
{
"epoch": 0.1111111111111111,
"grad_norm": 10.893043518066406,
"learning_rate": 0.00021373935337780118,
"loss": 1.3228,
"step": 60
},
{
"epoch": 0.12962962962962962,
"grad_norm": 11.86053466796875,
"learning_rate": 0.00021295868335534802,
"loss": 0.9465,
"step": 70
},
{
"epoch": 0.14814814814814814,
"grad_norm": 245.02352905273438,
"learning_rate": 0.0002116617932785172,
"loss": 0.8149,
"step": 80
},
{
"epoch": 0.16666666666666666,
"grad_norm": 6.186165809631348,
"learning_rate": 0.00020985500146540012,
"loss": 0.4509,
"step": 90
},
{
"epoch": 0.18518518518518517,
"grad_norm": 8.681466102600098,
"learning_rate": 0.0002075471104240922,
"loss": 0.4001,
"step": 100
},
{
"epoch": 0.18518518518518517,
"eval_loss": 0.7028859257698059,
"eval_runtime": 24.0444,
"eval_samples_per_second": 9.482,
"eval_steps_per_second": 2.371,
"step": 100
},
{
"epoch": 0.2037037037037037,
"grad_norm": 12.92205810546875,
"learning_rate": 0.00020474936396775828,
"loss": 1.4676,
"step": 110
},
{
"epoch": 0.2222222222222222,
"grad_norm": 15.443918228149414,
"learning_rate": 0.00020147539243590517,
"loss": 1.0882,
"step": 120
},
{
"epoch": 0.24074074074074073,
"grad_norm": 7.393853664398193,
"learning_rate": 0.00019774114628873756,
"loss": 0.6892,
"step": 130
},
{
"epoch": 0.25925925925925924,
"grad_norm": 17.222841262817383,
"learning_rate": 0.00019356481839811937,
"loss": 0.3922,
"step": 140
},
{
"epoch": 0.2777777777777778,
"grad_norm": 1.6537044048309326,
"learning_rate": 0.00018896675541373064,
"loss": 0.2181,
"step": 150
},
{
"epoch": 0.2777777777777778,
"eval_loss": 0.5269023776054382,
"eval_runtime": 23.7879,
"eval_samples_per_second": 9.585,
"eval_steps_per_second": 2.396,
"step": 150
},
{
"epoch": 0.2962962962962963,
"grad_norm": 10.590658187866211,
"learning_rate": 0.00018396935863623567,
"loss": 1.6471,
"step": 160
},
{
"epoch": 0.3148148148148148,
"grad_norm": 4.128945350646973,
"learning_rate": 0.00017859697488039784,
"loss": 0.9846,
"step": 170
},
{
"epoch": 0.3333333333333333,
"grad_norm": 9.964460372924805,
"learning_rate": 0.00017287577785984542,
"loss": 1.0229,
"step": 180
},
{
"epoch": 0.35185185185185186,
"grad_norm": 16.727569580078125,
"learning_rate": 0.0001668336406713699,
"loss": 0.3647,
"step": 190
},
{
"epoch": 0.37037037037037035,
"grad_norm": 1.3600995540618896,
"learning_rate": 0.0001605,
"loss": 0.2746,
"step": 200
},
{
"epoch": 0.37037037037037035,
"eval_loss": 0.5395556688308716,
"eval_runtime": 23.7998,
"eval_samples_per_second": 9.58,
"eval_steps_per_second": 2.395,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.571948497495654e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}