|
{ |
|
"best_metric": 1.3385324478149414, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.027018811847748996, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005403762369549799, |
|
"grad_norm": 0.7579950094223022, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2971, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005403762369549799, |
|
"eval_loss": 2.174750566482544, |
|
"eval_runtime": 2.9534, |
|
"eval_samples_per_second": 16.929, |
|
"eval_steps_per_second": 4.402, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010807524739099598, |
|
"grad_norm": 1.1134428977966309, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5831, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0016211287108649397, |
|
"grad_norm": 1.063336730003357, |
|
"learning_rate": 9.990365154573717e-05, |
|
"loss": 1.536, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0021615049478199196, |
|
"grad_norm": 0.9375065565109253, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 1.599, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0027018811847748995, |
|
"grad_norm": 0.774001955986023, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 1.5445, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0032422574217298794, |
|
"grad_norm": 1.1132839918136597, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 1.4067, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0037826336586848593, |
|
"grad_norm": 0.8962889313697815, |
|
"learning_rate": 9.761185582727977e-05, |
|
"loss": 1.3983, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004323009895639839, |
|
"grad_norm": 0.5397719144821167, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 1.3102, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.004863386132594819, |
|
"grad_norm": 0.37479284405708313, |
|
"learning_rate": 9.535927336897098e-05, |
|
"loss": 1.3458, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005403762369549799, |
|
"grad_norm": 0.43835580348968506, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 1.4246, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0059441386065047785, |
|
"grad_norm": 0.4866955578327179, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 1.3123, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.006484514843459759, |
|
"grad_norm": 0.5045744180679321, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 1.3961, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007024891080414738, |
|
"grad_norm": 0.3350974917411804, |
|
"learning_rate": 8.883279133655399e-05, |
|
"loss": 1.1432, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.007565267317369719, |
|
"grad_norm": 0.344038724899292, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 1.2358, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008105643554324698, |
|
"grad_norm": 0.3136480152606964, |
|
"learning_rate": 8.467056167950311e-05, |
|
"loss": 1.2589, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008646019791279679, |
|
"grad_norm": 0.2970927655696869, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 1.2068, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.009186396028234659, |
|
"grad_norm": 0.333193302154541, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 1.2867, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.009726772265189637, |
|
"grad_norm": 0.3563210070133209, |
|
"learning_rate": 7.75e-05, |
|
"loss": 1.2791, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010267148502144618, |
|
"grad_norm": 0.3321801424026489, |
|
"learning_rate": 7.490299105985507e-05, |
|
"loss": 1.308, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.010807524739099598, |
|
"grad_norm": 0.3614809811115265, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 1.2864, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011347900976054578, |
|
"grad_norm": 0.35129594802856445, |
|
"learning_rate": 6.946477593864228e-05, |
|
"loss": 1.3312, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.011888277213009557, |
|
"grad_norm": 0.356954425573349, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 1.3324, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.012428653449964537, |
|
"grad_norm": 0.37991097569465637, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 1.3666, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.012969029686919518, |
|
"grad_norm": 0.37282314896583557, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 1.3366, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.013509405923874498, |
|
"grad_norm": 0.39541906118392944, |
|
"learning_rate": 5.794314081535644e-05, |
|
"loss": 1.3103, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013509405923874498, |
|
"eval_loss": 1.3474552631378174, |
|
"eval_runtime": 2.3537, |
|
"eval_samples_per_second": 21.243, |
|
"eval_steps_per_second": 5.523, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014049782160829477, |
|
"grad_norm": 0.21152514219284058, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.2167, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.014590158397784457, |
|
"grad_norm": 0.28132620453834534, |
|
"learning_rate": 5.205685918464356e-05, |
|
"loss": 1.2485, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.015130534634739437, |
|
"grad_norm": 0.28846096992492676, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 1.3281, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.015670910871694418, |
|
"grad_norm": 0.2903560698032379, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 1.2298, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.016211287108649396, |
|
"grad_norm": 0.30449891090393066, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 1.2678, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01675166334560438, |
|
"grad_norm": 0.3175998032093048, |
|
"learning_rate": 4.053522406135775e-05, |
|
"loss": 1.3251, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.017292039582559357, |
|
"grad_norm": 0.3025440573692322, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 1.2648, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.017832415819514336, |
|
"grad_norm": 0.3220917582511902, |
|
"learning_rate": 3.509700894014496e-05, |
|
"loss": 1.3114, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.018372792056469318, |
|
"grad_norm": 0.30278536677360535, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 1.2781, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.018913168293424296, |
|
"grad_norm": 0.3035091459751129, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 1.3578, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.019453544530379275, |
|
"grad_norm": 0.3576919138431549, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 1.2516, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.019993920767334257, |
|
"grad_norm": 0.359345406293869, |
|
"learning_rate": 2.53294383204969e-05, |
|
"loss": 1.2206, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.020534297004289236, |
|
"grad_norm": 0.23424789309501648, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 1.203, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.021074673241244218, |
|
"grad_norm": 0.20766839385032654, |
|
"learning_rate": 2.1167208663446025e-05, |
|
"loss": 1.1936, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.021615049478199196, |
|
"grad_norm": 0.2421371042728424, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 1.2389, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.022155425715154175, |
|
"grad_norm": 0.24859775602817535, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 1.2361, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.022695801952109157, |
|
"grad_norm": 0.2736833393573761, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 1.3148, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.023236178189064136, |
|
"grad_norm": 0.26563286781311035, |
|
"learning_rate": 1.464072663102903e-05, |
|
"loss": 1.2846, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.023776554426019114, |
|
"grad_norm": 0.3035804033279419, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 1.2743, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.024316930662974096, |
|
"grad_norm": 0.2828158140182495, |
|
"learning_rate": 1.2388144172720251e-05, |
|
"loss": 1.2914, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.024857306899929075, |
|
"grad_norm": 0.2898053526878357, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 1.2546, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.025397683136884057, |
|
"grad_norm": 0.30576568841934204, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 1.2922, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.025938059373839036, |
|
"grad_norm": 0.3181474208831787, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 1.3652, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.026478435610794014, |
|
"grad_norm": 0.34133440256118774, |
|
"learning_rate": 1.0096348454262845e-05, |
|
"loss": 1.3907, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.027018811847748996, |
|
"grad_norm": 0.3962940573692322, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3401, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.027018811847748996, |
|
"eval_loss": 1.3385324478149414, |
|
"eval_runtime": 2.3472, |
|
"eval_samples_per_second": 21.302, |
|
"eval_steps_per_second": 5.538, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.061338191368028e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|