|
{ |
|
"best_metric": 0.04691193997859955, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem_aragpt2-large/checkpoint-610", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 2135, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.40213119983673096, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.8644, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.03103545034655718, |
|
"eval_loss": 0.049881722778081894, |
|
"eval_rouge1": 0.2976310465047741, |
|
"eval_rouge2": 0.0825114524566873, |
|
"eval_rougeL": 0.2931870351050212, |
|
"eval_runtime": 101.6572, |
|
"eval_samples_per_second": 2.99, |
|
"eval_steps_per_second": 0.374, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.45833268761634827, |
|
"learning_rate": 4.901785714285714e-05, |
|
"loss": 0.0439, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.07334464750125981, |
|
"eval_loss": 0.04691193997859955, |
|
"eval_rouge1": 0.39556592669658314, |
|
"eval_rouge2": 0.1676736409423039, |
|
"eval_rougeL": 0.3901085723006601, |
|
"eval_runtime": 41.076, |
|
"eval_samples_per_second": 7.401, |
|
"eval_steps_per_second": 0.925, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.3850250840187073, |
|
"learning_rate": 4.629464285714286e-05, |
|
"loss": 0.0307, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.09008751782596823, |
|
"eval_loss": 0.0473761111497879, |
|
"eval_rouge1": 0.44112670621442845, |
|
"eval_rouge2": 0.20925026290023724, |
|
"eval_rougeL": 0.43611440413552455, |
|
"eval_runtime": 41.095, |
|
"eval_samples_per_second": 7.397, |
|
"eval_steps_per_second": 0.925, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.4408362805843353, |
|
"learning_rate": 4.3571428571428576e-05, |
|
"loss": 0.0212, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.10385450068202749, |
|
"eval_loss": 0.04973715916275978, |
|
"eval_rouge1": 0.4643434470122675, |
|
"eval_rouge2": 0.23150126678662394, |
|
"eval_rougeL": 0.45914976519742434, |
|
"eval_runtime": 41.0763, |
|
"eval_samples_per_second": 7.401, |
|
"eval_steps_per_second": 0.925, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.4677392840385437, |
|
"learning_rate": 4.084821428571429e-05, |
|
"loss": 0.016, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.09227592943056641, |
|
"eval_loss": 0.054129794239997864, |
|
"eval_rouge1": 0.464136474952609, |
|
"eval_rouge2": 0.22291426741508574, |
|
"eval_rougeL": 0.4599627344232192, |
|
"eval_runtime": 41.1781, |
|
"eval_samples_per_second": 7.383, |
|
"eval_steps_per_second": 0.923, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.30016008019447327, |
|
"learning_rate": 3.8125e-05, |
|
"loss": 0.0134, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.11604582902883592, |
|
"eval_loss": 0.05314570292830467, |
|
"eval_rouge1": 0.47459996105255675, |
|
"eval_rouge2": 0.25028361013073314, |
|
"eval_rougeL": 0.46988615577124576, |
|
"eval_runtime": 61.9343, |
|
"eval_samples_per_second": 4.908, |
|
"eval_steps_per_second": 0.614, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.24085062742233276, |
|
"learning_rate": 3.5401785714285716e-05, |
|
"loss": 0.0118, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.11125669152362899, |
|
"eval_loss": 0.057827215641736984, |
|
"eval_rouge1": 0.4975937747836845, |
|
"eval_rouge2": 0.27138037997797015, |
|
"eval_rougeL": 0.4939744686596661, |
|
"eval_runtime": 101.6221, |
|
"eval_samples_per_second": 2.991, |
|
"eval_steps_per_second": 0.374, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 2135, |
|
"total_flos": 3.71386078199808e+16, |
|
"train_loss": 0.1430642572443156, |
|
"train_runtime": 4921.8294, |
|
"train_samples_per_second": 4.953, |
|
"train_steps_per_second": 1.239 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.71386078199808e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|