|
{ |
|
"best_metric": 0.10316114127635956, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg_aragpt2-base/checkpoint-35525", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 71050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1684599220752716, |
|
"learning_rate": 4.766772598870057e-05, |
|
"loss": 0.2542, |
|
"step": 7105 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.07285026466195073, |
|
"eval_loss": 0.11986471712589264, |
|
"eval_rouge1": 0.31870746501286634, |
|
"eval_rouge2": 0.11025143574709506, |
|
"eval_rougeL": 0.30940400851788263, |
|
"eval_runtime": 445.2495, |
|
"eval_samples_per_second": 31.912, |
|
"eval_steps_per_second": 3.991, |
|
"step": 7105 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.16581259667873383, |
|
"learning_rate": 4.515889830508475e-05, |
|
"loss": 0.1078, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.104385167515318, |
|
"eval_loss": 0.1118762344121933, |
|
"eval_rouge1": 0.38033922316590574, |
|
"eval_rouge2": 0.16357204386475152, |
|
"eval_rougeL": 0.37196606997888715, |
|
"eval_runtime": 445.4751, |
|
"eval_samples_per_second": 31.896, |
|
"eval_steps_per_second": 3.989, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.18644841015338898, |
|
"learning_rate": 4.265007062146893e-05, |
|
"loss": 0.0972, |
|
"step": 21315 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.12224033944483013, |
|
"eval_loss": 0.10767202824354172, |
|
"eval_rouge1": 0.4109049665753015, |
|
"eval_rouge2": 0.19329040527739555, |
|
"eval_rougeL": 0.40326193036349967, |
|
"eval_runtime": 383.8172, |
|
"eval_samples_per_second": 37.02, |
|
"eval_steps_per_second": 4.63, |
|
"step": 21315 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.2712990939617157, |
|
"learning_rate": 4.014124293785311e-05, |
|
"loss": 0.0902, |
|
"step": 28420 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.13124006032752472, |
|
"eval_loss": 0.10514508932828903, |
|
"eval_rouge1": 0.42940503923153844, |
|
"eval_rouge2": 0.2090469696784658, |
|
"eval_rougeL": 0.4223466730872162, |
|
"eval_runtime": 323.7733, |
|
"eval_samples_per_second": 43.886, |
|
"eval_steps_per_second": 5.488, |
|
"step": 28420 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.16391794383525848, |
|
"learning_rate": 3.763241525423729e-05, |
|
"loss": 0.0846, |
|
"step": 35525 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.14049162127130865, |
|
"eval_loss": 0.10316114127635956, |
|
"eval_rouge1": 0.44551198545007975, |
|
"eval_rouge2": 0.22506890852974587, |
|
"eval_rougeL": 0.4382572142917238, |
|
"eval_runtime": 340.0971, |
|
"eval_samples_per_second": 41.779, |
|
"eval_steps_per_second": 5.225, |
|
"step": 35525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.1612280309200287, |
|
"learning_rate": 3.5123587570621466e-05, |
|
"loss": 0.0799, |
|
"step": 42630 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.14535221713569074, |
|
"eval_loss": 0.10411085933446884, |
|
"eval_rouge1": 0.45365603658460285, |
|
"eval_rouge2": 0.23383881662198475, |
|
"eval_rougeL": 0.4465500235966283, |
|
"eval_runtime": 384.6849, |
|
"eval_samples_per_second": 36.937, |
|
"eval_steps_per_second": 4.619, |
|
"step": 42630 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.252353310585022, |
|
"learning_rate": 3.261475988700565e-05, |
|
"loss": 0.0759, |
|
"step": 49735 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.1493733281238007, |
|
"eval_loss": 0.10441984981298447, |
|
"eval_rouge1": 0.4622737132167348, |
|
"eval_rouge2": 0.24252644756195563, |
|
"eval_rougeL": 0.45529247029346154, |
|
"eval_runtime": 324.6182, |
|
"eval_samples_per_second": 43.771, |
|
"eval_steps_per_second": 5.474, |
|
"step": 49735 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.21136653423309326, |
|
"learning_rate": 3.010593220338983e-05, |
|
"loss": 0.0722, |
|
"step": 56840 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.1526924816405305, |
|
"eval_loss": 0.10443145781755447, |
|
"eval_rouge1": 0.4655122846049303, |
|
"eval_rouge2": 0.24695475478185897, |
|
"eval_rougeL": 0.45872867266974005, |
|
"eval_runtime": 324.8018, |
|
"eval_samples_per_second": 43.747, |
|
"eval_steps_per_second": 5.471, |
|
"step": 56840 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.26080313324928284, |
|
"learning_rate": 2.7597104519774014e-05, |
|
"loss": 0.069, |
|
"step": 63945 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.153555473627703, |
|
"eval_loss": 0.10583677142858505, |
|
"eval_rouge1": 0.4688625732021522, |
|
"eval_rouge2": 0.24885496454985231, |
|
"eval_rougeL": 0.46205637495730445, |
|
"eval_runtime": 324.9582, |
|
"eval_samples_per_second": 43.726, |
|
"eval_steps_per_second": 5.468, |
|
"step": 63945 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.27384456992149353, |
|
"learning_rate": 2.5088276836158192e-05, |
|
"loss": 0.066, |
|
"step": 71050 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.1549549290214687, |
|
"eval_loss": 0.10621096938848495, |
|
"eval_rouge1": 0.4724064043822283, |
|
"eval_rouge2": 0.25225313492301393, |
|
"eval_rougeL": 0.46574710538787245, |
|
"eval_runtime": 327.3388, |
|
"eval_samples_per_second": 43.408, |
|
"eval_steps_per_second": 5.429, |
|
"step": 71050 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 71050, |
|
"total_flos": 2.9701587861504e+17, |
|
"train_loss": 0.09969830163342276, |
|
"train_runtime": 40778.4516, |
|
"train_samples_per_second": 27.876, |
|
"train_steps_per_second": 3.485 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 142100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9701587861504e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|