|
{ |
|
"best_metric": 0.5168118476867676, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.37037037037037035, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001851851851851852, |
|
"eval_loss": 1.665893316268921, |
|
"eval_runtime": 23.8147, |
|
"eval_samples_per_second": 9.574, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018518518518518517, |
|
"grad_norm": 21.659528732299805, |
|
"learning_rate": 4.2800000000000004e-05, |
|
"loss": 2.1673, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.037037037037037035, |
|
"grad_norm": 16.18787956237793, |
|
"learning_rate": 8.560000000000001e-05, |
|
"loss": 1.4786, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 7.465191841125488, |
|
"learning_rate": 0.0001284, |
|
"loss": 0.9593, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 3.2875595092773438, |
|
"learning_rate": 0.00017120000000000001, |
|
"loss": 0.4504, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09259259259259259, |
|
"grad_norm": 9.41701602935791, |
|
"learning_rate": 0.000214, |
|
"loss": 0.3461, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09259259259259259, |
|
"eval_loss": 0.5168118476867676, |
|
"eval_runtime": 23.7827, |
|
"eval_samples_per_second": 9.587, |
|
"eval_steps_per_second": 2.397, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 10.893043518066406, |
|
"learning_rate": 0.00021373935337780118, |
|
"loss": 1.3228, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12962962962962962, |
|
"grad_norm": 11.86053466796875, |
|
"learning_rate": 0.00021295868335534802, |
|
"loss": 0.9465, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 245.02352905273438, |
|
"learning_rate": 0.0002116617932785172, |
|
"loss": 0.8149, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 6.186165809631348, |
|
"learning_rate": 0.00020985500146540012, |
|
"loss": 0.4509, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 8.681466102600098, |
|
"learning_rate": 0.0002075471104240922, |
|
"loss": 0.4001, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"eval_loss": 0.7028859257698059, |
|
"eval_runtime": 24.0444, |
|
"eval_samples_per_second": 9.482, |
|
"eval_steps_per_second": 2.371, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2037037037037037, |
|
"grad_norm": 12.92205810546875, |
|
"learning_rate": 0.00020474936396775828, |
|
"loss": 1.4676, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 15.443918228149414, |
|
"learning_rate": 0.00020147539243590517, |
|
"loss": 1.0882, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24074074074074073, |
|
"grad_norm": 7.393853664398193, |
|
"learning_rate": 0.00019774114628873756, |
|
"loss": 0.6892, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25925925925925924, |
|
"grad_norm": 17.222841262817383, |
|
"learning_rate": 0.00019356481839811937, |
|
"loss": 0.3922, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 1.6537044048309326, |
|
"learning_rate": 0.00018896675541373064, |
|
"loss": 0.2181, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"eval_loss": 0.5269023776054382, |
|
"eval_runtime": 23.7879, |
|
"eval_samples_per_second": 9.585, |
|
"eval_steps_per_second": 2.396, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 10.590658187866211, |
|
"learning_rate": 0.00018396935863623567, |
|
"loss": 1.6471, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3148148148148148, |
|
"grad_norm": 4.128945350646973, |
|
"learning_rate": 0.00017859697488039784, |
|
"loss": 0.9846, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 9.964460372924805, |
|
"learning_rate": 0.00017287577785984542, |
|
"loss": 1.0229, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.35185185185185186, |
|
"grad_norm": 16.727569580078125, |
|
"learning_rate": 0.0001668336406713699, |
|
"loss": 0.3647, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 1.3600995540618896, |
|
"learning_rate": 0.0001605, |
|
"loss": 0.2746, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"eval_loss": 0.5395556688308716, |
|
"eval_runtime": 23.7998, |
|
"eval_samples_per_second": 9.58, |
|
"eval_steps_per_second": 2.395, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.571948497495654e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|