|
{ |
|
"best_metric": 0.35316547751426697, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.2188183807439825, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00087527352297593, |
|
"eval_loss": 0.5409302115440369, |
|
"eval_runtime": 48.1638, |
|
"eval_samples_per_second": 9.987, |
|
"eval_steps_per_second": 2.512, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0087527352297593, |
|
"grad_norm": 0.6199236512184143, |
|
"learning_rate": 4.22e-05, |
|
"loss": 1.1012, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0175054704595186, |
|
"grad_norm": 0.603871762752533, |
|
"learning_rate": 8.44e-05, |
|
"loss": 0.7497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0262582056892779, |
|
"grad_norm": 0.17278185486793518, |
|
"learning_rate": 0.0001266, |
|
"loss": 0.2424, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0350109409190372, |
|
"grad_norm": 0.2742425203323364, |
|
"learning_rate": 0.0001688, |
|
"loss": 0.0084, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0437636761487965, |
|
"grad_norm": 0.21153666079044342, |
|
"learning_rate": 0.000211, |
|
"loss": 0.0076, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0437636761487965, |
|
"eval_loss": 0.4160047173500061, |
|
"eval_runtime": 48.0642, |
|
"eval_samples_per_second": 10.007, |
|
"eval_steps_per_second": 2.517, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0525164113785558, |
|
"grad_norm": 0.5463985204696655, |
|
"learning_rate": 0.00021074300730241147, |
|
"loss": 0.6921, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.061269146608315096, |
|
"grad_norm": 0.8098825812339783, |
|
"learning_rate": 0.00020997328125223568, |
|
"loss": 0.5244, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0700218818380744, |
|
"grad_norm": 0.38064104318618774, |
|
"learning_rate": 0.0002086945718774165, |
|
"loss": 0.2607, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0787746170678337, |
|
"grad_norm": 0.050267886370420456, |
|
"learning_rate": 0.00020691310892149265, |
|
"loss": 0.0061, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.087527352297593, |
|
"grad_norm": 0.3279358148574829, |
|
"learning_rate": 0.00020463757149291335, |
|
"loss": 0.0024, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.087527352297593, |
|
"eval_loss": 0.35316547751426697, |
|
"eval_runtime": 48.4486, |
|
"eval_samples_per_second": 9.928, |
|
"eval_steps_per_second": 2.497, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0962800875273523, |
|
"grad_norm": 0.5827049612998962, |
|
"learning_rate": 0.0002018790457812944, |
|
"loss": 0.8235, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1050328227571116, |
|
"grad_norm": 0.36756134033203125, |
|
"learning_rate": 0.0001986509710466168, |
|
"loss": 0.6406, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1137855579868709, |
|
"grad_norm": 0.10399194061756134, |
|
"learning_rate": 0.00019496907414450293, |
|
"loss": 0.1631, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12253829321663019, |
|
"grad_norm": 0.05342819169163704, |
|
"learning_rate": 0.00019085129290655697, |
|
"loss": 0.0033, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13129102844638948, |
|
"grad_norm": 0.01769879087805748, |
|
"learning_rate": 0.00018631768874905217, |
|
"loss": 0.001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13129102844638948, |
|
"eval_loss": 0.3601878583431244, |
|
"eval_runtime": 48.1446, |
|
"eval_samples_per_second": 9.991, |
|
"eval_steps_per_second": 2.513, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1400437636761488, |
|
"grad_norm": 0.5799025893211365, |
|
"learning_rate": 0.0001813903489357277, |
|
"loss": 0.9458, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1487964989059081, |
|
"grad_norm": 0.33210673928260803, |
|
"learning_rate": 0.00017609327897085954, |
|
"loss": 0.4653, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1575492341356674, |
|
"grad_norm": 0.07417658716440201, |
|
"learning_rate": 0.00017045228564685694, |
|
"loss": 0.101, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16630196936542668, |
|
"grad_norm": 0.005981265101581812, |
|
"learning_rate": 0.0001644948513161638, |
|
"loss": 0.0118, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.175054704595186, |
|
"grad_norm": 0.0026285904459655285, |
|
"learning_rate": 0.00015825, |
|
"loss": 0.001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.175054704595186, |
|
"eval_loss": 0.3552694618701935, |
|
"eval_runtime": 47.8669, |
|
"eval_samples_per_second": 10.049, |
|
"eval_steps_per_second": 2.528, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1838074398249453, |
|
"grad_norm": 0.5865310430526733, |
|
"learning_rate": 0.00015174815598624768, |
|
"loss": 0.7926, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1925601750547046, |
|
"grad_norm": 0.6336154341697693, |
|
"learning_rate": 0.00014502099560537873, |
|
"loss": 0.7418, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2013129102844639, |
|
"grad_norm": 0.0030132222454994917, |
|
"learning_rate": 0.00013810129290655696, |
|
"loss": 0.1631, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2100656455142232, |
|
"grad_norm": 0.004187281243503094, |
|
"learning_rate": 0.00013102275998576495, |
|
"loss": 0.001, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2188183807439825, |
|
"grad_norm": 0.03684316575527191, |
|
"learning_rate": 0.00012381988274386116, |
|
"loss": 0.0009, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2188183807439825, |
|
"eval_loss": 0.3651936948299408, |
|
"eval_runtime": 48.1017, |
|
"eval_samples_per_second": 10.0, |
|
"eval_steps_per_second": 2.516, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.17585994072064e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|