|
{ |
|
"best_metric": 0.6946045049764275, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1088", |
|
"epoch": 9.967845659163988, |
|
"eval_steps": 500, |
|
"global_step": 1550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9967845659163987, |
|
"eval_accuracy": 0.9435978055118868, |
|
"eval_f1": 0.593056643174106, |
|
"eval_loss": 0.14849668741226196, |
|
"eval_precision": 0.5668662674650699, |
|
"eval_recall": 0.6217843459222769, |
|
"eval_runtime": 5.8633, |
|
"eval_samples_per_second": 429.618, |
|
"eval_steps_per_second": 53.724, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9408546953704001, |
|
"eval_f1": 0.6299060919817, |
|
"eval_loss": 0.16094118356704712, |
|
"eval_precision": 0.5623387790197765, |
|
"eval_recall": 0.715927750410509, |
|
"eval_runtime": 5.9155, |
|
"eval_samples_per_second": 425.831, |
|
"eval_steps_per_second": 53.25, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.996784565916399, |
|
"eval_accuracy": 0.9486669447207161, |
|
"eval_f1": 0.6676790685902303, |
|
"eval_loss": 0.1634686440229416, |
|
"eval_precision": 0.6209981167608286, |
|
"eval_recall": 0.7219485495347564, |
|
"eval_runtime": 5.9009, |
|
"eval_samples_per_second": 426.884, |
|
"eval_steps_per_second": 53.382, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 3.215434083601286, |
|
"grad_norm": 0.9288749098777771, |
|
"learning_rate": 3.387096774193548e-05, |
|
"loss": 0.1246, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9492604831723828, |
|
"eval_f1": 0.6711919630735812, |
|
"eval_loss": 0.20466655492782593, |
|
"eval_precision": 0.665948275862069, |
|
"eval_recall": 0.6765188834154351, |
|
"eval_runtime": 5.9155, |
|
"eval_samples_per_second": 425.831, |
|
"eval_steps_per_second": 53.25, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 4.996784565916399, |
|
"eval_accuracy": 0.9479771567904007, |
|
"eval_f1": 0.6827731092436975, |
|
"eval_loss": 0.2133806049823761, |
|
"eval_precision": 0.6562342251388188, |
|
"eval_recall": 0.7115489874110563, |
|
"eval_runtime": 5.8759, |
|
"eval_samples_per_second": 428.701, |
|
"eval_steps_per_second": 53.609, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.949388815810581, |
|
"eval_f1": 0.6795913020696882, |
|
"eval_loss": 0.2258971482515335, |
|
"eval_precision": 0.6517587939698493, |
|
"eval_recall": 0.7099069512862616, |
|
"eval_runtime": 5.9106, |
|
"eval_samples_per_second": 426.187, |
|
"eval_steps_per_second": 53.294, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 6.430868167202572, |
|
"grad_norm": 0.18494442105293274, |
|
"learning_rate": 1.774193548387097e-05, |
|
"loss": 0.0242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.996784565916399, |
|
"eval_accuracy": 0.9496615226667522, |
|
"eval_f1": 0.6946045049764275, |
|
"eval_loss": 0.24502409994602203, |
|
"eval_precision": 0.6659969864389754, |
|
"eval_recall": 0.7257799671592775, |
|
"eval_runtime": 5.8918, |
|
"eval_samples_per_second": 427.542, |
|
"eval_steps_per_second": 53.464, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9490679842150855, |
|
"eval_f1": 0.6841015018125324, |
|
"eval_loss": 0.26501980423927307, |
|
"eval_precision": 0.6491400491400492, |
|
"eval_recall": 0.7230432402846196, |
|
"eval_runtime": 5.9184, |
|
"eval_samples_per_second": 425.624, |
|
"eval_steps_per_second": 53.224, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 8.996784565916398, |
|
"eval_accuracy": 0.9497577721454009, |
|
"eval_f1": 0.687797147385103, |
|
"eval_loss": 0.27453720569610596, |
|
"eval_precision": 0.664624808575804, |
|
"eval_recall": 0.7126436781609196, |
|
"eval_runtime": 5.9283, |
|
"eval_samples_per_second": 424.913, |
|
"eval_steps_per_second": 53.135, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 9.646302250803858, |
|
"grad_norm": 0.35425594449043274, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 0.0083, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.967845659163988, |
|
"eval_accuracy": 0.950255061118419, |
|
"eval_f1": 0.6896008403361344, |
|
"eval_loss": 0.27744925022125244, |
|
"eval_precision": 0.662796567390207, |
|
"eval_recall": 0.7186644772851669, |
|
"eval_runtime": 6.1493, |
|
"eval_samples_per_second": 409.643, |
|
"eval_steps_per_second": 51.226, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.967845659163988, |
|
"step": 1550, |
|
"total_flos": 4644619911314910.0, |
|
"train_loss": 0.050868147861573, |
|
"train_runtime": 855.1929, |
|
"train_samples_per_second": 116.102, |
|
"train_steps_per_second": 1.812 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1550, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4644619911314910.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|