|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9933774834437086, |
|
"eval_steps": 500, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013245033112582781, |
|
"grad_norm": 0.4943664073944092, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.4272, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 0.20984387397766113, |
|
"learning_rate": 0.000125, |
|
"loss": 1.3101, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 0.2290477156639099, |
|
"learning_rate": 0.00019956059820218982, |
|
"loss": 1.2917, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 0.15163910388946533, |
|
"learning_rate": 0.00019466156752904343, |
|
"loss": 1.2823, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 0.1627238243818283, |
|
"learning_rate": 0.00018458320592590975, |
|
"loss": 1.1889, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.33112582781456956, |
|
"grad_norm": 0.15383219718933105, |
|
"learning_rate": 0.00016987694277788417, |
|
"loss": 1.198, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 0.1501755714416504, |
|
"learning_rate": 0.0001513474193514842, |
|
"loss": 1.1762, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.46357615894039733, |
|
"grad_norm": 0.14539840817451477, |
|
"learning_rate": 0.0001300084635000341, |
|
"loss": 1.2176, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 0.12844280898571014, |
|
"learning_rate": 0.0001070276188945293, |
|
"loss": 1.1942, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5960264900662252, |
|
"grad_norm": 0.13806107640266418, |
|
"learning_rate": 8.366226381814697e-05, |
|
"loss": 1.2928, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"grad_norm": 0.13188520073890686, |
|
"learning_rate": 6.119081473277501e-05, |
|
"loss": 1.1959, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7284768211920529, |
|
"grad_norm": 0.12824179232120514, |
|
"learning_rate": 4.084277875864776e-05, |
|
"loss": 1.1188, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7947019867549668, |
|
"grad_norm": 0.14250224828720093, |
|
"learning_rate": 2.3731482188961818e-05, |
|
"loss": 1.2076, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8609271523178808, |
|
"grad_norm": 0.14001749455928802, |
|
"learning_rate": 1.0793155744261351e-05, |
|
"loss": 1.1352, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9271523178807947, |
|
"grad_norm": 0.15154731273651123, |
|
"learning_rate": 2.735709467518699e-06, |
|
"loss": 1.1486, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9933774834437086, |
|
"grad_norm": 0.14987458288669586, |
|
"learning_rate": 0.0, |
|
"loss": 1.2385, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9933774834437086, |
|
"step": 75, |
|
"total_flos": 5.564154814608179e+16, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.9326, |
|
"train_samples_per_second": 645.502, |
|
"train_steps_per_second": 80.42 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.564154814608179e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|