|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.5196850393700787, |
|
"eval_steps": 10.0, |
|
"global_step": 40, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06299212598425197, |
|
"grad_norm": 2.361685037612915, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.357, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12598425196850394, |
|
"grad_norm": 2.3121159076690674, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.3512, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1889763779527559, |
|
"grad_norm": 2.452298879623413, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.3768, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.25196850393700787, |
|
"grad_norm": 2.3367598056793213, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.361, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.31496062992125984, |
|
"grad_norm": 2.326319932937622, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3624, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3779527559055118, |
|
"grad_norm": 2.1740145683288574, |
|
"learning_rate": 3e-06, |
|
"loss": 0.3504, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.4409448818897638, |
|
"grad_norm": 2.296905279159546, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.3547, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.5039370078740157, |
|
"grad_norm": 2.190321445465088, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.3387, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5669291338582677, |
|
"grad_norm": 2.1358914375305176, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.3321, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 53.26506042480469, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3143, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6929133858267716, |
|
"grad_norm": 44.34261703491211, |
|
"learning_rate": 4.99847706754774e-06, |
|
"loss": 0.3209, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.7559055118110236, |
|
"grad_norm": 39.54725646972656, |
|
"learning_rate": 4.993910125649561e-06, |
|
"loss": 0.3071, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.8188976377952756, |
|
"grad_norm": 9.289456367492676, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 0.2997, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.8818897637795275, |
|
"grad_norm": 3.2083795070648193, |
|
"learning_rate": 4.975670171853926e-06, |
|
"loss": 0.3367, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 3.201711654663086, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 0.3471, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0078740157480315, |
|
"grad_norm": 2.9849319458007812, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 0.3337, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.0708661417322836, |
|
"grad_norm": 2.7130520343780518, |
|
"learning_rate": 4.925739315689991e-06, |
|
"loss": 0.3253, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.1338582677165354, |
|
"grad_norm": 2.2203686237335205, |
|
"learning_rate": 4.903154239845798e-06, |
|
"loss": 0.3085, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.1968503937007875, |
|
"grad_norm": 1.8168455362319946, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 0.2976, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.2598425196850394, |
|
"grad_norm": 1.3243907690048218, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 0.2939, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.3228346456692912, |
|
"grad_norm": 0.9819900989532471, |
|
"learning_rate": 4.817959636416969e-06, |
|
"loss": 0.2918, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.3858267716535433, |
|
"grad_norm": 0.7295715808868408, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 0.28, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.4488188976377954, |
|
"grad_norm": 0.6359255909919739, |
|
"learning_rate": 4.746985115747918e-06, |
|
"loss": 0.2858, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.5118110236220472, |
|
"grad_norm": 0.7184382677078247, |
|
"learning_rate": 4.707368982147318e-06, |
|
"loss": 0.2666, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.574803149606299, |
|
"grad_norm": 0.743729293346405, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 0.2723, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.6377952755905512, |
|
"grad_norm": 0.7132835984230042, |
|
"learning_rate": 4.620120240391065e-06, |
|
"loss": 0.274, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.7007874015748032, |
|
"grad_norm": 0.6352850198745728, |
|
"learning_rate": 4.572593931387604e-06, |
|
"loss": 0.2717, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.763779527559055, |
|
"grad_norm": 0.5060824155807495, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 0.2568, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.826771653543307, |
|
"grad_norm": 0.44254282116889954, |
|
"learning_rate": 4.470026884016805e-06, |
|
"loss": 0.2691, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.889763779527559, |
|
"grad_norm": 0.49223262071609497, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 0.2636, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.952755905511811, |
|
"grad_norm": 0.35744452476501465, |
|
"learning_rate": 4.357862063693486e-06, |
|
"loss": 0.2584, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.015748031496063, |
|
"grad_norm": 0.44493842124938965, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"loss": 0.2663, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.078740157480315, |
|
"grad_norm": 0.5151119232177734, |
|
"learning_rate": 4.236645926147493e-06, |
|
"loss": 0.259, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.141732283464567, |
|
"grad_norm": 0.5001265406608582, |
|
"learning_rate": 4.172826515897146e-06, |
|
"loss": 0.2575, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.204724409448819, |
|
"grad_norm": 0.4636068344116211, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 0.2472, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.267716535433071, |
|
"grad_norm": 0.375308632850647, |
|
"learning_rate": 4.039153688314146e-06, |
|
"loss": 0.2495, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.3307086614173227, |
|
"grad_norm": 0.31411102414131165, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 0.2505, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.393700787401575, |
|
"grad_norm": 0.24844396114349365, |
|
"learning_rate": 3.897982258676867e-06, |
|
"loss": 0.2438, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.456692913385827, |
|
"grad_norm": 0.239442840218544, |
|
"learning_rate": 3.824798160583012e-06, |
|
"loss": 0.2392, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.5196850393700787, |
|
"grad_norm": 0.1867786943912506, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.2363, |
|
"step": 40 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7790197780656947e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|