|
{ |
|
"best_metric": 0.7105076909065247, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-400", |
|
"epoch": 0.2170138888888889, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00043402777777777775, |
|
"eval_loss": 1.0486223697662354, |
|
"eval_runtime": 49.5892, |
|
"eval_samples_per_second": 9.78, |
|
"eval_steps_per_second": 2.46, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004340277777777778, |
|
"grad_norm": 1.30818772315979, |
|
"learning_rate": 4.2600000000000005e-05, |
|
"loss": 0.912, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008680555555555556, |
|
"grad_norm": 0.8930862545967102, |
|
"learning_rate": 8.520000000000001e-05, |
|
"loss": 0.8451, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013020833333333334, |
|
"grad_norm": 1.0381461381912231, |
|
"learning_rate": 0.0001278, |
|
"loss": 0.5786, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017361111111111112, |
|
"grad_norm": 1.2415528297424316, |
|
"learning_rate": 0.00017040000000000002, |
|
"loss": 0.6245, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.021701388888888888, |
|
"grad_norm": 0.8442565202713013, |
|
"learning_rate": 0.000213, |
|
"loss": 0.4496, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021701388888888888, |
|
"eval_loss": 0.7272957563400269, |
|
"eval_runtime": 49.5625, |
|
"eval_samples_per_second": 9.786, |
|
"eval_steps_per_second": 2.462, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.026041666666666668, |
|
"grad_norm": 1.2658860683441162, |
|
"learning_rate": 0.00021274057135267128, |
|
"loss": 0.8503, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.030381944444444444, |
|
"grad_norm": 1.3917545080184937, |
|
"learning_rate": 0.00021196354932097723, |
|
"loss": 0.6671, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.034722222222222224, |
|
"grad_norm": 1.3726558685302734, |
|
"learning_rate": 0.0002106727194781503, |
|
"loss": 0.7479, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0390625, |
|
"grad_norm": 1.26426100730896, |
|
"learning_rate": 0.00020887437061743096, |
|
"loss": 0.8955, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.043402777777777776, |
|
"grad_norm": 0.8687089085578918, |
|
"learning_rate": 0.00020657726411369925, |
|
"loss": 0.8438, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.043402777777777776, |
|
"eval_loss": 0.7392935752868652, |
|
"eval_runtime": 49.5607, |
|
"eval_samples_per_second": 9.786, |
|
"eval_steps_per_second": 2.462, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04774305555555555, |
|
"grad_norm": 0.9549110531806946, |
|
"learning_rate": 0.000203792591238937, |
|
"loss": 0.7712, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"grad_norm": 1.3070204257965088, |
|
"learning_rate": 0.0002005339186394757, |
|
"loss": 0.669, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05642361111111111, |
|
"grad_norm": 1.2052979469299316, |
|
"learning_rate": 0.00019681712224065936, |
|
"loss": 0.616, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06076388888888889, |
|
"grad_norm": 0.6207852363586426, |
|
"learning_rate": 0.0001926603099009319, |
|
"loss": 0.7878, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06510416666666667, |
|
"grad_norm": 0.7814576029777527, |
|
"learning_rate": 0.00018808373319217114, |
|
"loss": 0.4888, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06510416666666667, |
|
"eval_loss": 0.7211872339248657, |
|
"eval_runtime": 49.5742, |
|
"eval_samples_per_second": 9.783, |
|
"eval_steps_per_second": 2.461, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06944444444444445, |
|
"grad_norm": 0.8724411129951477, |
|
"learning_rate": 0.00018310968873606635, |
|
"loss": 0.7598, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07378472222222222, |
|
"grad_norm": 1.6653738021850586, |
|
"learning_rate": 0.0001777624095772184, |
|
"loss": 0.8254, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 1.2747048139572144, |
|
"learning_rate": 0.0001720679471221826, |
|
"loss": 0.8594, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08246527777777778, |
|
"grad_norm": 1.4110088348388672, |
|
"learning_rate": 0.00016605404421963453, |
|
"loss": 0.7506, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08680555555555555, |
|
"grad_norm": 2.366544485092163, |
|
"learning_rate": 0.00015975, |
|
"loss": 0.9703, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08680555555555555, |
|
"eval_loss": 0.7333279252052307, |
|
"eval_runtime": 49.6093, |
|
"eval_samples_per_second": 9.776, |
|
"eval_steps_per_second": 2.459, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09114583333333333, |
|
"grad_norm": 1.0888848304748535, |
|
"learning_rate": 0.00015318652713303674, |
|
"loss": 0.5779, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0954861111111111, |
|
"grad_norm": 0.9510017037391663, |
|
"learning_rate": 0.00014639560219879464, |
|
"loss": 0.7727, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0998263888888889, |
|
"grad_norm": 0.347385048866272, |
|
"learning_rate": 0.0001394103099009319, |
|
"loss": 0.6551, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 1.4552265405654907, |
|
"learning_rate": 0.0001322646818813646, |
|
"loss": 0.6279, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.10850694444444445, |
|
"grad_norm": 1.388659119606018, |
|
"learning_rate": 0.0001249935309215281, |
|
"loss": 0.7938, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10850694444444445, |
|
"eval_loss": 0.7187796831130981, |
|
"eval_runtime": 49.5717, |
|
"eval_samples_per_second": 9.784, |
|
"eval_steps_per_second": 2.461, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11284722222222222, |
|
"grad_norm": 0.9789396524429321, |
|
"learning_rate": 0.0001176322813380051, |
|
"loss": 0.8391, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"grad_norm": 1.4175513982772827, |
|
"learning_rate": 0.00011021679639881638, |
|
"loss": 0.8921, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12152777777777778, |
|
"grad_norm": 1.268831729888916, |
|
"learning_rate": 0.00010278320360118368, |
|
"loss": 0.5002, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12586805555555555, |
|
"grad_norm": 0.8849596977233887, |
|
"learning_rate": 9.536771866199493e-05, |
|
"loss": 0.602, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13020833333333334, |
|
"grad_norm": 1.0034514665603638, |
|
"learning_rate": 8.800646907847192e-05, |
|
"loss": 0.8343, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13020833333333334, |
|
"eval_loss": 0.7189058065414429, |
|
"eval_runtime": 49.5602, |
|
"eval_samples_per_second": 9.786, |
|
"eval_steps_per_second": 2.462, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1345486111111111, |
|
"grad_norm": 0.5106523036956787, |
|
"learning_rate": 8.07353181186354e-05, |
|
"loss": 0.715, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 0.9988264441490173, |
|
"learning_rate": 7.35896900990681e-05, |
|
"loss": 0.8572, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14322916666666666, |
|
"grad_norm": 1.3042094707489014, |
|
"learning_rate": 6.660439780120536e-05, |
|
"loss": 0.7248, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14756944444444445, |
|
"grad_norm": 0.9319302439689636, |
|
"learning_rate": 5.981347286696324e-05, |
|
"loss": 0.873, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1519097222222222, |
|
"grad_norm": 1.1595097780227661, |
|
"learning_rate": 5.325000000000002e-05, |
|
"loss": 0.5117, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1519097222222222, |
|
"eval_loss": 0.7169974446296692, |
|
"eval_runtime": 49.5441, |
|
"eval_samples_per_second": 9.789, |
|
"eval_steps_per_second": 2.462, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 1.3601690530776978, |
|
"learning_rate": 4.6945955780365475e-05, |
|
"loss": 0.8535, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1605902777777778, |
|
"grad_norm": 0.8528502583503723, |
|
"learning_rate": 4.0932052877817393e-05, |
|
"loss": 0.7758, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16493055555555555, |
|
"grad_norm": 0.9841234683990479, |
|
"learning_rate": 3.523759042278163e-05, |
|
"loss": 0.6716, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16927083333333334, |
|
"grad_norm": 0.7331548929214478, |
|
"learning_rate": 2.989031126393367e-05, |
|
"loss": 0.9835, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1736111111111111, |
|
"grad_norm": 1.9498828649520874, |
|
"learning_rate": 2.4916266807828855e-05, |
|
"loss": 0.9712, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1736111111111111, |
|
"eval_loss": 0.7105076909065247, |
|
"eval_runtime": 49.5867, |
|
"eval_samples_per_second": 9.781, |
|
"eval_steps_per_second": 2.46, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1779513888888889, |
|
"grad_norm": 1.3422185182571411, |
|
"learning_rate": 2.033969009906811e-05, |
|
"loss": 0.8554, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18229166666666666, |
|
"grad_norm": 1.4816110134124756, |
|
"learning_rate": 1.6182877759340637e-05, |
|
"loss": 0.6037, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18663194444444445, |
|
"grad_norm": 0.8355005383491516, |
|
"learning_rate": 1.2466081360524275e-05, |
|
"loss": 0.6468, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1909722222222222, |
|
"grad_norm": 0.657798171043396, |
|
"learning_rate": 9.207408761062996e-06, |
|
"loss": 0.48, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 1.6586601734161377, |
|
"learning_rate": 6.422735886300764e-06, |
|
"loss": 0.6231, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"eval_loss": 0.712680459022522, |
|
"eval_runtime": 49.6689, |
|
"eval_samples_per_second": 9.765, |
|
"eval_steps_per_second": 2.456, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1996527777777778, |
|
"grad_norm": 0.8355949521064758, |
|
"learning_rate": 4.125629382569038e-06, |
|
"loss": 0.6722, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20399305555555555, |
|
"grad_norm": 1.7075914144515991, |
|
"learning_rate": 2.327280521849694e-06, |
|
"loss": 0.66, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 0.7151721119880676, |
|
"learning_rate": 1.0364506790227565e-06, |
|
"loss": 0.6677, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2126736111111111, |
|
"grad_norm": 0.9590463042259216, |
|
"learning_rate": 2.5942864732872295e-07, |
|
"loss": 0.9534, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2170138888888889, |
|
"grad_norm": 2.80539870262146, |
|
"learning_rate": 0.0, |
|
"loss": 0.8418, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2170138888888889, |
|
"eval_loss": 0.7122297286987305, |
|
"eval_runtime": 49.691, |
|
"eval_samples_per_second": 9.76, |
|
"eval_steps_per_second": 2.455, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.64727392017449e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|