|
{ |
|
"best_metric": 0.3523136377334595, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-250", |
|
"epoch": 0.2188183807439825, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00087527352297593, |
|
"eval_loss": 0.5409302115440369, |
|
"eval_runtime": 47.9717, |
|
"eval_samples_per_second": 10.027, |
|
"eval_steps_per_second": 2.522, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0087527352297593, |
|
"grad_norm": 0.6175008416175842, |
|
"learning_rate": 4.0600000000000004e-05, |
|
"loss": 1.102, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0175054704595186, |
|
"grad_norm": 0.5922743082046509, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 0.7527, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0262582056892779, |
|
"grad_norm": 0.18080410361289978, |
|
"learning_rate": 0.00012179999999999999, |
|
"loss": 0.2428, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0350109409190372, |
|
"grad_norm": 0.2408462017774582, |
|
"learning_rate": 0.00016240000000000002, |
|
"loss": 0.0084, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0437636761487965, |
|
"grad_norm": 0.16170749068260193, |
|
"learning_rate": 0.000203, |
|
"loss": 0.0081, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0437636761487965, |
|
"eval_loss": 0.41450178623199463, |
|
"eval_runtime": 48.0897, |
|
"eval_samples_per_second": 10.002, |
|
"eval_steps_per_second": 2.516, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0525164113785558, |
|
"grad_norm": 0.523928165435791, |
|
"learning_rate": 0.00020275275110137215, |
|
"loss": 0.6924, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.061269146608315096, |
|
"grad_norm": 0.6150461435317993, |
|
"learning_rate": 0.00020201220897726938, |
|
"loss": 0.5178, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0700218818380744, |
|
"grad_norm": 0.3789159655570984, |
|
"learning_rate": 0.00020078198147448128, |
|
"loss": 0.2702, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0787746170678337, |
|
"grad_norm": 1.880570411682129, |
|
"learning_rate": 0.00019906806213773937, |
|
"loss": 0.0048, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.087527352297593, |
|
"grad_norm": 0.6373544335365295, |
|
"learning_rate": 0.0001968788010097697, |
|
"loss": 0.0049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.087527352297593, |
|
"eval_loss": 0.3669174313545227, |
|
"eval_runtime": 48.1037, |
|
"eval_samples_per_second": 9.999, |
|
"eval_steps_per_second": 2.515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0962800875273523, |
|
"grad_norm": 0.6095497608184814, |
|
"learning_rate": 0.00019422486395072398, |
|
"loss": 0.8385, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1050328227571116, |
|
"grad_norm": 0.44535937905311584, |
|
"learning_rate": 0.0001911191806751811, |
|
"loss": 0.5802, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1137855579868709, |
|
"grad_norm": 0.10592010617256165, |
|
"learning_rate": 0.00018757688175987723, |
|
"loss": 0.1586, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12253829321663019, |
|
"grad_norm": 0.025220032781362534, |
|
"learning_rate": 0.00018361522492905716, |
|
"loss": 0.0018, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13129102844638948, |
|
"grad_norm": 0.014781222678720951, |
|
"learning_rate": 0.00017925351097657625, |
|
"loss": 0.002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13129102844638948, |
|
"eval_loss": 0.36381667852401733, |
|
"eval_runtime": 48.1198, |
|
"eval_samples_per_second": 9.996, |
|
"eval_steps_per_second": 2.515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1400437636761488, |
|
"grad_norm": 0.5591445565223694, |
|
"learning_rate": 0.00017451298973437308, |
|
"loss": 0.9412, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1487964989059081, |
|
"grad_norm": 0.30725646018981934, |
|
"learning_rate": 0.0001694167565454241, |
|
"loss": 0.4974, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1575492341356674, |
|
"grad_norm": 0.11813530325889587, |
|
"learning_rate": 0.0001639896397455543, |
|
"loss": 0.0992, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16630196936542668, |
|
"grad_norm": 0.009045140817761421, |
|
"learning_rate": 0.0001582580797022808, |
|
"loss": 0.0109, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.175054704595186, |
|
"grad_norm": 0.017792249098420143, |
|
"learning_rate": 0.00015225, |
|
"loss": 0.0007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.175054704595186, |
|
"eval_loss": 0.35675832629203796, |
|
"eval_runtime": 47.8961, |
|
"eval_samples_per_second": 10.043, |
|
"eval_steps_per_second": 2.526, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1838074398249453, |
|
"grad_norm": 0.5258771777153015, |
|
"learning_rate": 0.00014599467139909136, |
|
"loss": 0.7887, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1925601750547046, |
|
"grad_norm": 0.5954610109329224, |
|
"learning_rate": 0.0001395225692317151, |
|
"loss": 0.7499, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2013129102844639, |
|
"grad_norm": 0.0030906260944902897, |
|
"learning_rate": 0.00013286522492905717, |
|
"loss": 0.1606, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2100656455142232, |
|
"grad_norm": 0.0038282345049083233, |
|
"learning_rate": 0.00012605507240336626, |
|
"loss": 0.0009, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2188183807439825, |
|
"grad_norm": 0.01913132704794407, |
|
"learning_rate": 0.00011912529003319345, |
|
"loss": 0.0004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2188183807439825, |
|
"eval_loss": 0.3523136377334595, |
|
"eval_runtime": 48.1668, |
|
"eval_samples_per_second": 9.986, |
|
"eval_steps_per_second": 2.512, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.17585994072064e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|