|
{ |
|
"best_metric": 0.33410218358039856, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.012825445684237527, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00025650891368475054, |
|
"grad_norm": 0.2452760636806488, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.4425, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00025650891368475054, |
|
"eval_loss": 0.39530715346336365, |
|
"eval_runtime": 31.6895, |
|
"eval_samples_per_second": 1.578, |
|
"eval_steps_per_second": 0.221, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005130178273695011, |
|
"grad_norm": 0.17752642929553986, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.299, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0007695267410542517, |
|
"grad_norm": 0.24689655005931854, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 1.5197, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0010260356547390022, |
|
"grad_norm": 0.19504807889461517, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.3453, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0012825445684237528, |
|
"grad_norm": 0.1986263245344162, |
|
"learning_rate": 0.00015, |
|
"loss": 1.3429, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0015390534821085034, |
|
"grad_norm": 0.310422420501709, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 1.3507, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0017955623957932538, |
|
"grad_norm": 0.2197422981262207, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 1.3089, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0020520713094780044, |
|
"grad_norm": 0.22613106667995453, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 1.161, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.002308580223162755, |
|
"grad_norm": 0.2758508026599884, |
|
"learning_rate": 0.00027, |
|
"loss": 1.3218, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0025650891368475055, |
|
"grad_norm": 0.24372389912605286, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2277, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002821598050532256, |
|
"grad_norm": 0.24940873682498932, |
|
"learning_rate": 0.0002999794957488703, |
|
"loss": 1.1953, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0030781069642170067, |
|
"grad_norm": 0.23299287259578705, |
|
"learning_rate": 0.0002999179886011389, |
|
"loss": 1.1864, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003334615877901757, |
|
"grad_norm": 0.20366379618644714, |
|
"learning_rate": 0.0002998154953722457, |
|
"loss": 1.2584, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0035911247915865075, |
|
"grad_norm": 0.2235558032989502, |
|
"learning_rate": 0.00029967204408281613, |
|
"loss": 1.1625, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.003847633705271258, |
|
"grad_norm": 0.198708176612854, |
|
"learning_rate": 0.00029948767395100045, |
|
"loss": 1.1748, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004104142618956009, |
|
"grad_norm": 0.1967567503452301, |
|
"learning_rate": 0.0002992624353817517, |
|
"loss": 1.1347, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004360651532640759, |
|
"grad_norm": 0.15366947650909424, |
|
"learning_rate": 0.0002989963899530457, |
|
"loss": 1.0114, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.00461716044632551, |
|
"grad_norm": 0.18661633133888245, |
|
"learning_rate": 0.00029868961039904624, |
|
"loss": 1.2625, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00487366936001026, |
|
"grad_norm": 3.7823309898376465, |
|
"learning_rate": 0.00029834218059022024, |
|
"loss": 1.2615, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005130178273695011, |
|
"grad_norm": 0.20137062668800354, |
|
"learning_rate": 0.00029795419551040833, |
|
"loss": 1.27, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005386687187379761, |
|
"grad_norm": 0.18843530118465424, |
|
"learning_rate": 0.00029752576123085736, |
|
"loss": 1.0947, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005643196101064512, |
|
"grad_norm": 0.17115682363510132, |
|
"learning_rate": 0.0002970569948812214, |
|
"loss": 1.1604, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0058997050147492625, |
|
"grad_norm": 0.17423115670681, |
|
"learning_rate": 0.0002965480246175399, |
|
"loss": 1.2279, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0061562139284340135, |
|
"grad_norm": 0.20533309876918793, |
|
"learning_rate": 0.0002959989895872009, |
|
"loss": 1.3852, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006412722842118764, |
|
"grad_norm": 0.19516155123710632, |
|
"learning_rate": 0.0002954100398908995, |
|
"loss": 1.3035, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006412722842118764, |
|
"eval_loss": 0.3231082558631897, |
|
"eval_runtime": 31.8559, |
|
"eval_samples_per_second": 1.57, |
|
"eval_steps_per_second": 0.22, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006669231755803514, |
|
"grad_norm": 0.3001914322376251, |
|
"learning_rate": 0.0002947813365416023, |
|
"loss": 1.3519, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.006925740669488265, |
|
"grad_norm": 0.2448321133852005, |
|
"learning_rate": 0.0002941130514205272, |
|
"loss": 1.3134, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.007182249583173015, |
|
"grad_norm": 0.21013857424259186, |
|
"learning_rate": 0.0002934053672301536, |
|
"loss": 1.1908, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.007438758496857766, |
|
"grad_norm": 0.2920572757720947, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 1.2764, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.007695267410542516, |
|
"grad_norm": 0.23325499892234802, |
|
"learning_rate": 0.00029187258625509513, |
|
"loss": 1.2081, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007951776324227267, |
|
"grad_norm": 0.24203185737133026, |
|
"learning_rate": 0.00029104790851742417, |
|
"loss": 1.2996, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.008208285237912017, |
|
"grad_norm": 0.20244275033473969, |
|
"learning_rate": 0.0002901846696899191, |
|
"loss": 1.2741, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.008464794151596768, |
|
"grad_norm": 0.24007591605186462, |
|
"learning_rate": 0.00028928310577345606, |
|
"loss": 1.2926, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.008721303065281518, |
|
"grad_norm": 2.9329335689544678, |
|
"learning_rate": 0.0002883434632466077, |
|
"loss": 1.3904, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00897781197896627, |
|
"grad_norm": 0.2622990906238556, |
|
"learning_rate": 0.00028736599899825856, |
|
"loss": 1.3224, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00923432089265102, |
|
"grad_norm": 0.3389492630958557, |
|
"learning_rate": 0.00028635098025737434, |
|
"loss": 1.3022, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00949082980633577, |
|
"grad_norm": 0.3291040062904358, |
|
"learning_rate": 0.00028529868451994384, |
|
"loss": 1.3772, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.00974733872002052, |
|
"grad_norm": 0.29008948802948, |
|
"learning_rate": 0.0002842093994731145, |
|
"loss": 1.3874, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.010003847633705272, |
|
"grad_norm": 0.2444349229335785, |
|
"learning_rate": 0.00028308342291654174, |
|
"loss": 1.3194, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.010260356547390022, |
|
"grad_norm": 0.2349006086587906, |
|
"learning_rate": 0.00028192106268097334, |
|
"loss": 1.2226, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010516865461074772, |
|
"grad_norm": 0.3519050180912018, |
|
"learning_rate": 0.00028072263654409154, |
|
"loss": 1.2642, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.010773374374759523, |
|
"grad_norm": 3.122650384902954, |
|
"learning_rate": 0.0002794884721436361, |
|
"loss": 1.3993, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.011029883288444273, |
|
"grad_norm": 0.4444164037704468, |
|
"learning_rate": 0.00027821890688783083, |
|
"loss": 1.2325, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.011286392202129025, |
|
"grad_norm": 0.3635028302669525, |
|
"learning_rate": 0.0002769142878631403, |
|
"loss": 1.2689, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.011542901115813775, |
|
"grad_norm": 0.5229223370552063, |
|
"learning_rate": 0.00027557497173937923, |
|
"loss": 1.3902, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.011799410029498525, |
|
"grad_norm": 0.6191115379333496, |
|
"learning_rate": 0.000274201324672203, |
|
"loss": 1.4242, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.012055918943183275, |
|
"grad_norm": 1.497909665107727, |
|
"learning_rate": 0.00027279372220300385, |
|
"loss": 1.7142, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.012312427856868027, |
|
"grad_norm": 1.673071026802063, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 1.6996, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.012568936770552777, |
|
"grad_norm": 7.249523639678955, |
|
"learning_rate": 0.00026987819953423867, |
|
"loss": 1.9288, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.012825445684237527, |
|
"grad_norm": 6.95615816116333, |
|
"learning_rate": 0.00026837107640945905, |
|
"loss": 2.1629, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012825445684237527, |
|
"eval_loss": 0.33410218358039856, |
|
"eval_runtime": 31.8447, |
|
"eval_samples_per_second": 1.57, |
|
"eval_steps_per_second": 0.22, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7850713268224e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|