{ "best_metric": 0.04691193997859955, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_yem_aragpt2-large/checkpoint-610", "epoch": 7.0, "eval_steps": 500, "global_step": 2135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.40213119983673096, "learning_rate": 3.05e-05, "loss": 0.8644, "step": 305 }, { "epoch": 1.0, "eval_bleu": 0.03103545034655718, "eval_loss": 0.049881722778081894, "eval_rouge1": 0.2976310465047741, "eval_rouge2": 0.0825114524566873, "eval_rougeL": 0.2931870351050212, "eval_runtime": 101.6572, "eval_samples_per_second": 2.99, "eval_steps_per_second": 0.374, "step": 305 }, { "epoch": 2.0, "grad_norm": 0.45833268761634827, "learning_rate": 4.901785714285714e-05, "loss": 0.0439, "step": 610 }, { "epoch": 2.0, "eval_bleu": 0.07334464750125981, "eval_loss": 0.04691193997859955, "eval_rouge1": 0.39556592669658314, "eval_rouge2": 0.1676736409423039, "eval_rougeL": 0.3901085723006601, "eval_runtime": 41.076, "eval_samples_per_second": 7.401, "eval_steps_per_second": 0.925, "step": 610 }, { "epoch": 3.0, "grad_norm": 0.3850250840187073, "learning_rate": 4.629464285714286e-05, "loss": 0.0307, "step": 915 }, { "epoch": 3.0, "eval_bleu": 0.09008751782596823, "eval_loss": 0.0473761111497879, "eval_rouge1": 0.44112670621442845, "eval_rouge2": 0.20925026290023724, "eval_rougeL": 0.43611440413552455, "eval_runtime": 41.095, "eval_samples_per_second": 7.397, "eval_steps_per_second": 0.925, "step": 915 }, { "epoch": 4.0, "grad_norm": 0.4408362805843353, "learning_rate": 4.3571428571428576e-05, "loss": 0.0212, "step": 1220 }, { "epoch": 4.0, "eval_bleu": 0.10385450068202749, "eval_loss": 0.04973715916275978, "eval_rouge1": 0.4643434470122675, "eval_rouge2": 0.23150126678662394, "eval_rougeL": 0.45914976519742434, "eval_runtime": 41.0763, "eval_samples_per_second": 7.401, "eval_steps_per_second": 0.925, "step": 1220 }, { "epoch": 5.0, "grad_norm": 0.4677392840385437, "learning_rate": 4.084821428571429e-05, "loss": 0.016, "step": 1525 }, { "epoch": 5.0, "eval_bleu": 0.09227592943056641, "eval_loss": 0.054129794239997864, "eval_rouge1": 0.464136474952609, "eval_rouge2": 0.22291426741508574, "eval_rougeL": 0.4599627344232192, "eval_runtime": 41.1781, "eval_samples_per_second": 7.383, "eval_steps_per_second": 0.923, "step": 1525 }, { "epoch": 6.0, "grad_norm": 0.30016008019447327, "learning_rate": 3.8125e-05, "loss": 0.0134, "step": 1830 }, { "epoch": 6.0, "eval_bleu": 0.11604582902883592, "eval_loss": 0.05314570292830467, "eval_rouge1": 0.47459996105255675, "eval_rouge2": 0.25028361013073314, "eval_rougeL": 0.46988615577124576, "eval_runtime": 61.9343, "eval_samples_per_second": 4.908, "eval_steps_per_second": 0.614, "step": 1830 }, { "epoch": 7.0, "grad_norm": 0.24085062742233276, "learning_rate": 3.5401785714285716e-05, "loss": 0.0118, "step": 2135 }, { "epoch": 7.0, "eval_bleu": 0.11125669152362899, "eval_loss": 0.057827215641736984, "eval_rouge1": 0.4975937747836845, "eval_rouge2": 0.27138037997797015, "eval_rougeL": 0.4939744686596661, "eval_runtime": 101.6221, "eval_samples_per_second": 2.991, "eval_steps_per_second": 0.374, "step": 2135 }, { "epoch": 7.0, "step": 2135, "total_flos": 3.71386078199808e+16, "train_loss": 0.1430642572443156, "train_runtime": 4921.8294, "train_samples_per_second": 4.953, "train_steps_per_second": 1.239 } ], "logging_steps": 500, "max_steps": 6100, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.71386078199808e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }