|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3325020781379884, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0033250207813798837, |
|
"eval_loss": 2.4745399951934814, |
|
"eval_runtime": 35.0849, |
|
"eval_samples_per_second": 14.451, |
|
"eval_steps_per_second": 1.824, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00997506234413965, |
|
"grad_norm": 1.8293392658233643, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.4184, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0199501246882793, |
|
"grad_norm": 2.3942174911499023, |
|
"learning_rate": 3e-05, |
|
"loss": 2.4593, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.029925187032418952, |
|
"grad_norm": 1.915959358215332, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.2337, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.029925187032418952, |
|
"eval_loss": 2.385331153869629, |
|
"eval_runtime": 35.4941, |
|
"eval_samples_per_second": 14.284, |
|
"eval_steps_per_second": 1.803, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0399002493765586, |
|
"grad_norm": 2.4679598808288574, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 2.4138, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04987531172069826, |
|
"grad_norm": 2.2216885089874268, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 2.1899, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.059850374064837904, |
|
"grad_norm": 2.1264395713806152, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 1.9607, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.059850374064837904, |
|
"eval_loss": 1.7929750680923462, |
|
"eval_runtime": 35.5582, |
|
"eval_samples_per_second": 14.258, |
|
"eval_steps_per_second": 1.8, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06982543640897755, |
|
"grad_norm": 1.875053882598877, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 1.7054, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0798004987531172, |
|
"grad_norm": 1.6136735677719116, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 1.4407, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08977556109725686, |
|
"grad_norm": 1.430875301361084, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 1.4597, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08977556109725686, |
|
"eval_loss": 1.4339165687561035, |
|
"eval_runtime": 35.58, |
|
"eval_samples_per_second": 14.25, |
|
"eval_steps_per_second": 1.799, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.09975062344139651, |
|
"grad_norm": 1.4442172050476074, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 1.4351, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10972568578553615, |
|
"grad_norm": 1.886985421180725, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 1.4478, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.11970074812967581, |
|
"grad_norm": 1.4151262044906616, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 1.2824, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11970074812967581, |
|
"eval_loss": 1.3912534713745117, |
|
"eval_runtime": 35.581, |
|
"eval_samples_per_second": 14.249, |
|
"eval_steps_per_second": 1.799, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12967581047381546, |
|
"grad_norm": 1.5250979661941528, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 1.3491, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1396508728179551, |
|
"grad_norm": 1.3472334146499634, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 1.4616, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14962593516209477, |
|
"grad_norm": 1.2380613088607788, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 1.2844, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14962593516209477, |
|
"eval_loss": 1.3702603578567505, |
|
"eval_runtime": 35.6209, |
|
"eval_samples_per_second": 14.233, |
|
"eval_steps_per_second": 1.797, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1596009975062344, |
|
"grad_norm": 1.376523494720459, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 1.3452, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.16957605985037408, |
|
"grad_norm": 1.2102608680725098, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 1.3209, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.17955112219451372, |
|
"grad_norm": 1.7671351432800293, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 1.4073, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.17955112219451372, |
|
"eval_loss": 1.3600367307662964, |
|
"eval_runtime": 35.6092, |
|
"eval_samples_per_second": 14.238, |
|
"eval_steps_per_second": 1.797, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.18952618453865336, |
|
"grad_norm": 1.6414278745651245, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 1.4529, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.19950124688279303, |
|
"grad_norm": 1.6693403720855713, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 1.263, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.20947630922693267, |
|
"grad_norm": 1.2293144464492798, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 1.374, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.20947630922693267, |
|
"eval_loss": 1.3520565032958984, |
|
"eval_runtime": 35.64, |
|
"eval_samples_per_second": 14.226, |
|
"eval_steps_per_second": 1.796, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2194513715710723, |
|
"grad_norm": 1.280704379081726, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 1.2475, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.22942643391521197, |
|
"grad_norm": 1.503631591796875, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 1.3387, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.23940149625935161, |
|
"grad_norm": 1.318060278892517, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 1.2855, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.23940149625935161, |
|
"eval_loss": 1.346817970275879, |
|
"eval_runtime": 35.6, |
|
"eval_samples_per_second": 14.242, |
|
"eval_steps_per_second": 1.798, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.24937655860349128, |
|
"grad_norm": 1.3991254568099976, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 1.2667, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2593516209476309, |
|
"grad_norm": 1.3369468450546265, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 1.2339, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.26932668329177056, |
|
"grad_norm": 1.3982964754104614, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 1.3598, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.26932668329177056, |
|
"eval_loss": 1.3438166379928589, |
|
"eval_runtime": 35.6426, |
|
"eval_samples_per_second": 14.225, |
|
"eval_steps_per_second": 1.796, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2793017456359102, |
|
"grad_norm": 1.6268279552459717, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 1.258, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2892768079800499, |
|
"grad_norm": 1.3860325813293457, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 1.3495, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.29925187032418954, |
|
"grad_norm": 1.7704390287399292, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 1.312, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.29925187032418954, |
|
"eval_loss": 1.3430734872817993, |
|
"eval_runtime": 35.5853, |
|
"eval_samples_per_second": 14.247, |
|
"eval_steps_per_second": 1.798, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3092269326683292, |
|
"grad_norm": 1.516430377960205, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 1.2602, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3192019950124688, |
|
"grad_norm": 1.294288158416748, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 1.3084, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.32917705735660846, |
|
"grad_norm": 1.539898157119751, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 1.4346, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.32917705735660846, |
|
"eval_loss": 1.3427472114562988, |
|
"eval_runtime": 35.5324, |
|
"eval_samples_per_second": 14.269, |
|
"eval_steps_per_second": 1.801, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.483774567120896e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|