|
{ |
|
"best_metric": 4.55251932144165, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.2557544757033248, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005115089514066497, |
|
"grad_norm": 9.48886775970459, |
|
"learning_rate": 1e-05, |
|
"loss": 5.5047, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005115089514066497, |
|
"eval_loss": 7.651984214782715, |
|
"eval_runtime": 23.2283, |
|
"eval_samples_per_second": 14.207, |
|
"eval_steps_per_second": 3.573, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010230179028132993, |
|
"grad_norm": 7.729719638824463, |
|
"learning_rate": 2e-05, |
|
"loss": 5.8985, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015345268542199489, |
|
"grad_norm": 8.052858352661133, |
|
"learning_rate": 3e-05, |
|
"loss": 6.0288, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.020460358056265986, |
|
"grad_norm": 6.788303852081299, |
|
"learning_rate": 4e-05, |
|
"loss": 5.5813, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02557544757033248, |
|
"grad_norm": 4.8062262535095215, |
|
"learning_rate": 5e-05, |
|
"loss": 5.5641, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.030690537084398978, |
|
"grad_norm": 3.997166872024536, |
|
"learning_rate": 6e-05, |
|
"loss": 5.9237, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03580562659846547, |
|
"grad_norm": 3.928126811981201, |
|
"learning_rate": 7e-05, |
|
"loss": 5.4307, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04092071611253197, |
|
"grad_norm": 5.754164695739746, |
|
"learning_rate": 8e-05, |
|
"loss": 4.9893, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04603580562659847, |
|
"grad_norm": 4.404843807220459, |
|
"learning_rate": 9e-05, |
|
"loss": 5.0629, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05115089514066496, |
|
"grad_norm": 4.053828716278076, |
|
"learning_rate": 0.0001, |
|
"loss": 4.5917, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056265984654731455, |
|
"grad_norm": 3.8155884742736816, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 4.2516, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.061381074168797956, |
|
"grad_norm": 3.4340007305145264, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 4.174, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06649616368286446, |
|
"grad_norm": 2.701197385787964, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 3.8316, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07161125319693094, |
|
"grad_norm": 3.004122734069824, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 3.987, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07672634271099744, |
|
"grad_norm": 2.845054864883423, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 3.6807, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08184143222506395, |
|
"grad_norm": 3.3714535236358643, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 4.0097, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 2.416005849838257, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 3.2052, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09207161125319693, |
|
"grad_norm": 2.8669612407684326, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 3.2715, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09718670076726342, |
|
"grad_norm": 2.7934770584106445, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 3.0802, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10230179028132992, |
|
"grad_norm": 2.592581272125244, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 3.1131, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10741687979539642, |
|
"grad_norm": 3.4438283443450928, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 3.175, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11253196930946291, |
|
"grad_norm": 3.016294479370117, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 3.5276, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 2.802727222442627, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 2.9135, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12276214833759591, |
|
"grad_norm": 2.832228422164917, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 3.1054, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1278772378516624, |
|
"grad_norm": 2.6756863594055176, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 2.9628, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1329923273657289, |
|
"grad_norm": 2.733081817626953, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 2.9537, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13810741687979539, |
|
"grad_norm": 2.7606492042541504, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 3.0507, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1432225063938619, |
|
"grad_norm": 2.96380877494812, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 2.7823, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1483375959079284, |
|
"grad_norm": 3.0976133346557617, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 2.9072, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1534526854219949, |
|
"grad_norm": 3.3771605491638184, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 3.009, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1585677749360614, |
|
"grad_norm": 3.442176580429077, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 2.603, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1636828644501279, |
|
"grad_norm": 3.1501970291137695, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 3.0066, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16879795396419436, |
|
"grad_norm": 3.4222052097320557, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 2.802, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 3.5420730113983154, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 2.795, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.17902813299232737, |
|
"grad_norm": 3.4514718055725098, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 2.4177, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.18414322250639387, |
|
"grad_norm": 4.618533134460449, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 1.9886, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.18925831202046037, |
|
"grad_norm": 4.481081485748291, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 2.0961, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.19437340153452684, |
|
"grad_norm": 4.901217460632324, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 1.8523, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.19948849104859334, |
|
"grad_norm": 3.705735921859741, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 0.8015, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.20460358056265984, |
|
"grad_norm": 1.5407553911209106, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 0.1171, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.20971867007672634, |
|
"grad_norm": 2.0642812252044678, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 0.3588, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.21483375959079284, |
|
"grad_norm": 2.8660378456115723, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 0.2585, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.21994884910485935, |
|
"grad_norm": 2.2530696392059326, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 0.3681, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.22506393861892582, |
|
"grad_norm": 2.2244601249694824, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 0.0182, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.23017902813299232, |
|
"grad_norm": 0.11490941047668457, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 0.0033, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 0.13940373063087463, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 0.0032, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.24040920716112532, |
|
"grad_norm": 3.6119070053100586, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 0.1022, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.24552429667519182, |
|
"grad_norm": 1.867747187614441, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.0344, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2506393861892583, |
|
"grad_norm": 56.889617919921875, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 12.0252, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2557544757033248, |
|
"grad_norm": 51.18042755126953, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 10.018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2557544757033248, |
|
"eval_loss": 4.55251932144165, |
|
"eval_runtime": 23.5332, |
|
"eval_samples_per_second": 14.023, |
|
"eval_steps_per_second": 3.527, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.71020323176448e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|