|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 9.879792873874985,
|
|
"global_step": 5000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 5.555555555555555e-05,
|
|
"loss": 13.7427,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 0.0001111111111111111,
|
|
"loss": 8.8821,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 0.00016666666666666666,
|
|
"loss": 2.5857,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 0.0002222222222222222,
|
|
"loss": 1.3518,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 0.0002777777777777778,
|
|
"loss": 1.0054,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 0.0003333333333333333,
|
|
"loss": 0.864,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 0.0003888888888888889,
|
|
"loss": 0.8669,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 0.0004444444444444444,
|
|
"loss": 0.9395,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 0.0005,
|
|
"loss": 1.0424,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 0.0004985515643105447,
|
|
"loss": 0.6298,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"eval_loss": 1.61441171169281,
|
|
"eval_rouge1": 0.07727272727272727,
|
|
"eval_rouge2": 0.061111111111111116,
|
|
"eval_rougeL": 0.06893939393939394,
|
|
"eval_rougeLsum": 0.08333333333333333,
|
|
"eval_runtime": 91.9317,
|
|
"eval_samples_per_second": 0.218,
|
|
"eval_steps_per_second": 0.218,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 0.0004971031286210893,
|
|
"loss": 0.5699,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 0.0004956546929316338,
|
|
"loss": 0.6074,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 0.0004942062572421785,
|
|
"loss": 0.6291,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 0.0004927578215527231,
|
|
"loss": 0.5098,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 0.0004913093858632677,
|
|
"loss": 0.4448,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 0.0004898609501738123,
|
|
"loss": 0.4508,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 0.0004884125144843569,
|
|
"loss": 0.4255,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 0.00048696407879490153,
|
|
"loss": 0.3705,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 0.0004855156431054461,
|
|
"loss": 0.4009,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 0.00048406720741599077,
|
|
"loss": 0.4317,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"eval_loss": 1.176011323928833,
|
|
"eval_rouge1": 0.16153846153846152,
|
|
"eval_rouge2": 0.06988636363636364,
|
|
"eval_rougeL": 0.15999999999999998,
|
|
"eval_rougeLsum": 0.15999999999999998,
|
|
"eval_runtime": 88.2692,
|
|
"eval_samples_per_second": 0.227,
|
|
"eval_steps_per_second": 0.227,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 0.00048261877172653536,
|
|
"loss": 0.3679,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 0.00048117033603707995,
|
|
"loss": 0.3252,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 0.0004797219003476246,
|
|
"loss": 0.3707,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 0.0004782734646581692,
|
|
"loss": 0.3543,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 0.0004768250289687138,
|
|
"loss": 0.3434,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 0.0004753765932792584,
|
|
"loss": 0.4007,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 0.000473928157589803,
|
|
"loss": 0.3552,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 0.00047247972190034765,
|
|
"loss": 0.388,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 0.00047103128621089224,
|
|
"loss": 0.307,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 0.0004695828505214368,
|
|
"loss": 0.2588,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"eval_loss": 1.0633952617645264,
|
|
"eval_rouge1": 0.08,
|
|
"eval_rouge2": 0.05555555555555556,
|
|
"eval_rougeL": 0.08414141414141416,
|
|
"eval_rougeLsum": 0.08383838383838385,
|
|
"eval_runtime": 88.5138,
|
|
"eval_samples_per_second": 0.226,
|
|
"eval_steps_per_second": 0.226,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 0.00046813441483198147,
|
|
"loss": 0.2965,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 0.00046668597914252606,
|
|
"loss": 0.3096,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 0.0004652375434530707,
|
|
"loss": 0.301,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 0.00046378910776361535,
|
|
"loss": 0.3153,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 0.00046234067207415994,
|
|
"loss": 0.3549,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 0.0004608922363847045,
|
|
"loss": 0.356,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 0.00045944380069524917,
|
|
"loss": 0.3278,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 0.00045799536500579376,
|
|
"loss": 0.2985,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 0.00045654692931633835,
|
|
"loss": 0.3152,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 0.000455098493626883,
|
|
"loss": 0.2665,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"eval_loss": 1.044259786605835,
|
|
"eval_rouge1": 0.06307692307692307,
|
|
"eval_rouge2": 0.021590909090909088,
|
|
"eval_rougeL": 0.06307692307692307,
|
|
"eval_rougeLsum": 0.06307692307692307,
|
|
"eval_runtime": 80.5878,
|
|
"eval_samples_per_second": 0.248,
|
|
"eval_steps_per_second": 0.248,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 0.0004536500579374276,
|
|
"loss": 0.2323,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 0.00045220162224797217,
|
|
"loss": 0.2222,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 0.0004507531865585168,
|
|
"loss": 0.2516,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 0.0004493047508690614,
|
|
"loss": 0.2851,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 0.000447856315179606,
|
|
"loss": 0.2677,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 0.00044640787949015064,
|
|
"loss": 0.2447,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 0.00044495944380069523,
|
|
"loss": 0.3186,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 0.0004435110081112398,
|
|
"loss": 0.3035,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 0.0004420625724217845,
|
|
"loss": 0.3036,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 0.0004406141367323291,
|
|
"loss": 0.1972,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"eval_loss": 1.0465357303619385,
|
|
"eval_rouge1": 0.1908791208791209,
|
|
"eval_rouge2": 0.10681818181818181,
|
|
"eval_rougeL": 0.17934065934065935,
|
|
"eval_rougeLsum": 0.19159340659340657,
|
|
"eval_runtime": 84.3482,
|
|
"eval_samples_per_second": 0.237,
|
|
"eval_steps_per_second": 0.237,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 0.00043916570104287375,
|
|
"loss": 0.279,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 1.03,
|
|
"learning_rate": 0.00043771726535341834,
|
|
"loss": 0.272,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 1.05,
|
|
"learning_rate": 0.00043626882966396293,
|
|
"loss": 0.2272,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 1.07,
|
|
"learning_rate": 0.0004348203939745076,
|
|
"loss": 0.2495,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 1.09,
|
|
"learning_rate": 0.00043337195828505216,
|
|
"loss": 0.1965,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 1.11,
|
|
"learning_rate": 0.00043192352259559675,
|
|
"loss": 0.2364,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 1.13,
|
|
"learning_rate": 0.0004304750869061414,
|
|
"loss": 0.2478,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 0.000429026651216686,
|
|
"loss": 0.2046,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 0.0004275782155272306,
|
|
"loss": 0.2661,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 0.0004261297798377752,
|
|
"loss": 0.2041,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"eval_loss": 0.9551488757133484,
|
|
"eval_rouge1": 0.09038461538461538,
|
|
"eval_rouge2": 0.05051948051948052,
|
|
"eval_rougeL": 0.09679487179487178,
|
|
"eval_rougeLsum": 0.09871794871794871,
|
|
"eval_runtime": 89.0139,
|
|
"eval_samples_per_second": 0.225,
|
|
"eval_steps_per_second": 0.225,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 0.0004246813441483198,
|
|
"loss": 0.2816,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 1.22,
|
|
"learning_rate": 0.0004232329084588644,
|
|
"loss": 0.1904,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 1.24,
|
|
"learning_rate": 0.00042178447276940904,
|
|
"loss": 0.21,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 1.26,
|
|
"learning_rate": 0.0004203360370799537,
|
|
"loss": 0.1662,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 1.28,
|
|
"learning_rate": 0.0004188876013904983,
|
|
"loss": 0.3052,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 0.0004174391657010429,
|
|
"loss": 0.1744,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 0.0004159907300115875,
|
|
"loss": 0.288,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 0.0004145422943221321,
|
|
"loss": 0.2303,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 0.00041309385863267674,
|
|
"loss": 0.2866,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 0.00041164542294322133,
|
|
"loss": 0.238,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"eval_loss": 0.9423090219497681,
|
|
"eval_rouge1": 0.1,
|
|
"eval_rouge2": 0.07291666666666667,
|
|
"eval_rougeL": 0.1,
|
|
"eval_rougeLsum": 0.10333333333333335,
|
|
"eval_runtime": 88.0456,
|
|
"eval_samples_per_second": 0.227,
|
|
"eval_steps_per_second": 0.227,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 0.0004101969872537659,
|
|
"loss": 0.2342,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 0.00040874855156431057,
|
|
"loss": 0.2716,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 1.44,
|
|
"learning_rate": 0.00040730011587485516,
|
|
"loss": 0.2453,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 1.46,
|
|
"learning_rate": 0.00040585168018539974,
|
|
"loss": 0.2313,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 1.48,
|
|
"learning_rate": 0.0004044032444959444,
|
|
"loss": 0.2306,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"learning_rate": 0.000402954808806489,
|
|
"loss": 0.1773,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 1.52,
|
|
"learning_rate": 0.00040150637311703357,
|
|
"loss": 0.1957,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 1.54,
|
|
"learning_rate": 0.0004000579374275782,
|
|
"loss": 0.2758,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 0.0003986095017381228,
|
|
"loss": 0.2649,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 0.0003971610660486675,
|
|
"loss": 0.275,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"eval_loss": 0.9273136258125305,
|
|
"eval_rouge1": 0.14666666666666667,
|
|
"eval_rouge2": 0.10977272727272727,
|
|
"eval_rougeL": 0.15038461538461537,
|
|
"eval_rougeLsum": 0.15153846153846154,
|
|
"eval_runtime": 87.8017,
|
|
"eval_samples_per_second": 0.228,
|
|
"eval_steps_per_second": 0.228,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 0.0003957126303592121,
|
|
"loss": 0.2102,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 0.0003942641946697567,
|
|
"loss": 0.2146,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 0.0003928157589803013,
|
|
"loss": 0.1918,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 0.0003913673232908459,
|
|
"loss": 0.2512,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 0.0003899188876013905,
|
|
"loss": 0.2499,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.7,
|
|
"learning_rate": 0.00038847045191193515,
|
|
"loss": 0.228,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.72,
|
|
"learning_rate": 0.00038702201622247974,
|
|
"loss": 0.2507,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.74,
|
|
"learning_rate": 0.0003855735805330243,
|
|
"loss": 0.1735,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 0.00038412514484356897,
|
|
"loss": 0.2752,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"learning_rate": 0.00038267670915411356,
|
|
"loss": 0.2379,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.78,
|
|
"eval_loss": 0.9023244976997375,
|
|
"eval_rouge1": 0.1,
|
|
"eval_rouge2": 0.08333333333333333,
|
|
"eval_rougeL": 0.1,
|
|
"eval_rougeLsum": 0.1,
|
|
"eval_runtime": 80.7798,
|
|
"eval_samples_per_second": 0.248,
|
|
"eval_steps_per_second": 0.248,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"learning_rate": 0.00038122827346465815,
|
|
"loss": 0.1993,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.82,
|
|
"learning_rate": 0.0003797798377752028,
|
|
"loss": 0.2058,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 0.0003783314020857474,
|
|
"loss": 0.2675,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 0.00037688296639629197,
|
|
"loss": 0.1928,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 0.0003754345307068366,
|
|
"loss": 0.1903,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 0.00037398609501738126,
|
|
"loss": 0.1967,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 0.00037253765932792585,
|
|
"loss": 0.2044,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 0.0003710892236384705,
|
|
"loss": 0.2027,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.95,
|
|
"learning_rate": 0.0003696407879490151,
|
|
"loss": 0.25,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 0.00036819235225955967,
|
|
"loss": 0.2896,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"eval_loss": 0.9184179306030273,
|
|
"eval_rouge1": 0.19,
|
|
"eval_rouge2": 0.1,
|
|
"eval_rougeL": 0.18893939393939393,
|
|
"eval_rougeLsum": 0.19848484848484846,
|
|
"eval_runtime": 81.9559,
|
|
"eval_samples_per_second": 0.244,
|
|
"eval_steps_per_second": 0.244,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 0.0003667439165701043,
|
|
"loss": 0.1462,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 2.02,
|
|
"learning_rate": 0.0003652954808806489,
|
|
"loss": 0.1944,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 2.04,
|
|
"learning_rate": 0.0003638470451911935,
|
|
"loss": 0.2296,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 2.06,
|
|
"learning_rate": 0.00036239860950173814,
|
|
"loss": 0.2446,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 2.07,
|
|
"learning_rate": 0.00036095017381228273,
|
|
"loss": 0.1448,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 2.09,
|
|
"learning_rate": 0.0003595017381228273,
|
|
"loss": 0.1507,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 2.11,
|
|
"learning_rate": 0.00035805330243337196,
|
|
"loss": 0.1636,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 2.13,
|
|
"learning_rate": 0.00035660486674391655,
|
|
"loss": 0.1909,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 2.15,
|
|
"learning_rate": 0.0003551564310544612,
|
|
"loss": 0.1895,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"learning_rate": 0.0003537079953650058,
|
|
"loss": 0.2663,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.17,
|
|
"eval_loss": 0.9002671241760254,
|
|
"eval_rouge1": 0.07948717948717948,
|
|
"eval_rouge2": 0.06779220779220778,
|
|
"eval_rougeL": 0.08782051282051281,
|
|
"eval_rougeLsum": 0.08333333333333333,
|
|
"eval_runtime": 84.9078,
|
|
"eval_samples_per_second": 0.236,
|
|
"eval_steps_per_second": 0.236,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 2.19,
|
|
"learning_rate": 0.0003522595596755504,
|
|
"loss": 0.1672,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 2.21,
|
|
"learning_rate": 0.00035081112398609507,
|
|
"loss": 0.1909,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 2.23,
|
|
"learning_rate": 0.00034936268829663966,
|
|
"loss": 0.1675,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"learning_rate": 0.00034791425260718425,
|
|
"loss": 0.2636,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 2.27,
|
|
"learning_rate": 0.0003464658169177289,
|
|
"loss": 0.2119,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 2.29,
|
|
"learning_rate": 0.0003450173812282735,
|
|
"loss": 0.2114,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 2.31,
|
|
"learning_rate": 0.0003435689455388181,
|
|
"loss": 0.1456,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 2.33,
|
|
"learning_rate": 0.0003421205098493627,
|
|
"loss": 0.1993,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 2.35,
|
|
"learning_rate": 0.0003406720741599073,
|
|
"loss": 0.1467,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"learning_rate": 0.0003392236384704519,
|
|
"loss": 0.237,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 2.37,
|
|
"eval_loss": 0.9139176607131958,
|
|
"eval_rouge1": 0.19904761904761903,
|
|
"eval_rouge2": 0.10285714285714284,
|
|
"eval_rougeL": 0.19511904761904764,
|
|
"eval_rougeLsum": 0.20619047619047615,
|
|
"eval_runtime": 85.9903,
|
|
"eval_samples_per_second": 0.233,
|
|
"eval_steps_per_second": 0.233,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 2.39,
|
|
"learning_rate": 0.00033777520278099654,
|
|
"loss": 0.1996,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 2.41,
|
|
"learning_rate": 0.00033632676709154113,
|
|
"loss": 0.1725,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 2.43,
|
|
"learning_rate": 0.0003348783314020857,
|
|
"loss": 0.2089,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 2.45,
|
|
"learning_rate": 0.00033342989571263036,
|
|
"loss": 0.183,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 2.47,
|
|
"learning_rate": 0.00033198146002317495,
|
|
"loss": 0.1631,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 0.00033053302433371954,
|
|
"loss": 0.1889,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 2.51,
|
|
"learning_rate": 0.00032908458864426424,
|
|
"loss": 0.172,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 2.53,
|
|
"learning_rate": 0.00032763615295480883,
|
|
"loss": 0.1236,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 2.55,
|
|
"learning_rate": 0.0003261877172653534,
|
|
"loss": 0.1682,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"learning_rate": 0.00032473928157589806,
|
|
"loss": 0.2019,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 2.57,
|
|
"eval_loss": 0.920964241027832,
|
|
"eval_rouge1": 0.11282051282051282,
|
|
"eval_rouge2": 0.03636363636363636,
|
|
"eval_rougeL": 0.1128205128205128,
|
|
"eval_rougeLsum": 0.11607142857142858,
|
|
"eval_runtime": 82.9262,
|
|
"eval_samples_per_second": 0.241,
|
|
"eval_steps_per_second": 0.241,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 2.59,
|
|
"learning_rate": 0.00032329084588644265,
|
|
"loss": 0.1681,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 2.61,
|
|
"learning_rate": 0.00032184241019698724,
|
|
"loss": 0.2372,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 2.63,
|
|
"learning_rate": 0.0003203939745075319,
|
|
"loss": 0.1343,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 2.65,
|
|
"learning_rate": 0.0003189455388180765,
|
|
"loss": 0.2125,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 2.67,
|
|
"learning_rate": 0.0003174971031286211,
|
|
"loss": 0.2217,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 2.69,
|
|
"learning_rate": 0.0003160486674391657,
|
|
"loss": 0.1542,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 2.71,
|
|
"learning_rate": 0.0003146002317497103,
|
|
"loss": 0.171,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 2.73,
|
|
"learning_rate": 0.00031315179606025494,
|
|
"loss": 0.1808,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"learning_rate": 0.00031170336037079953,
|
|
"loss": 0.1423,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"learning_rate": 0.0003102549246813441,
|
|
"loss": 0.1794,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 2.77,
|
|
"eval_loss": 0.9037507772445679,
|
|
"eval_rouge1": 0.11666666666666667,
|
|
"eval_rouge2": 0.08636363636363635,
|
|
"eval_rougeL": 0.11833333333333333,
|
|
"eval_rougeLsum": 0.12064102564102563,
|
|
"eval_runtime": 84.5851,
|
|
"eval_samples_per_second": 0.236,
|
|
"eval_steps_per_second": 0.236,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 2.79,
|
|
"learning_rate": 0.00030880648899188877,
|
|
"loss": 0.2313,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 2.8,
|
|
"learning_rate": 0.00030735805330243336,
|
|
"loss": 0.1548,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 0.000305909617612978,
|
|
"loss": 0.2318,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 2.84,
|
|
"learning_rate": 0.00030446118192352264,
|
|
"loss": 0.1959,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 2.86,
|
|
"learning_rate": 0.00030301274623406723,
|
|
"loss": 0.1438,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 2.88,
|
|
"learning_rate": 0.0003015643105446118,
|
|
"loss": 0.1953,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 2.9,
|
|
"learning_rate": 0.00030011587485515647,
|
|
"loss": 0.1542,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 2.92,
|
|
"learning_rate": 0.00029866743916570106,
|
|
"loss": 0.1693,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 2.94,
|
|
"learning_rate": 0.00029721900347624565,
|
|
"loss": 0.1836,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"learning_rate": 0.0002957705677867903,
|
|
"loss": 0.1847,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 2.96,
|
|
"eval_loss": 0.8892697095870972,
|
|
"eval_rouge1": 0.14335664335664336,
|
|
"eval_rouge2": 0.13131313131313133,
|
|
"eval_rougeL": 0.14375624375624377,
|
|
"eval_rougeLsum": 0.14725274725274726,
|
|
"eval_runtime": 86.985,
|
|
"eval_samples_per_second": 0.23,
|
|
"eval_steps_per_second": 0.23,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 2.98,
|
|
"learning_rate": 0.0002943221320973349,
|
|
"loss": 0.1245,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"learning_rate": 0.00029287369640787947,
|
|
"loss": 0.1917,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 3.02,
|
|
"learning_rate": 0.0002914252607184241,
|
|
"loss": 0.205,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 3.04,
|
|
"learning_rate": 0.0002899768250289687,
|
|
"loss": 0.1493,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 3.06,
|
|
"learning_rate": 0.0002885283893395133,
|
|
"loss": 0.1596,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 0.00028707995365005794,
|
|
"loss": 0.1689,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 3.1,
|
|
"learning_rate": 0.0002856315179606025,
|
|
"loss": 0.1371,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 3.12,
|
|
"learning_rate": 0.0002841830822711471,
|
|
"loss": 0.1676,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 3.14,
|
|
"learning_rate": 0.0002827346465816918,
|
|
"loss": 0.1441,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"learning_rate": 0.0002812862108922364,
|
|
"loss": 0.1436,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 3.16,
|
|
"eval_loss": 0.8872199058532715,
|
|
"eval_rouge1": 0.16825396825396824,
|
|
"eval_rouge2": 0.05833333333333333,
|
|
"eval_rougeL": 0.1650793650793651,
|
|
"eval_rougeLsum": 0.17285714285714288,
|
|
"eval_runtime": 83.5131,
|
|
"eval_samples_per_second": 0.239,
|
|
"eval_steps_per_second": 0.239,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 3.18,
|
|
"learning_rate": 0.000279837775202781,
|
|
"loss": 0.2173,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 3.2,
|
|
"learning_rate": 0.00027838933951332564,
|
|
"loss": 0.1457,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 3.22,
|
|
"learning_rate": 0.0002769409038238702,
|
|
"loss": 0.1503,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 3.24,
|
|
"learning_rate": 0.00027549246813441487,
|
|
"loss": 0.1371,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 3.26,
|
|
"learning_rate": 0.00027404403244495946,
|
|
"loss": 0.1331,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 3.28,
|
|
"learning_rate": 0.00027259559675550405,
|
|
"loss": 0.2068,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 3.3,
|
|
"learning_rate": 0.0002711471610660487,
|
|
"loss": 0.2001,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 0.0002696987253765933,
|
|
"loss": 0.177,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 3.34,
|
|
"learning_rate": 0.00026825028968713787,
|
|
"loss": 0.1772,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"learning_rate": 0.0002668018539976825,
|
|
"loss": 0.138,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 3.36,
|
|
"eval_loss": 0.8929020762443542,
|
|
"eval_rouge1": 0.22999999999999998,
|
|
"eval_rouge2": 0.12491883116883117,
|
|
"eval_rougeL": 0.22615384615384615,
|
|
"eval_rougeLsum": 0.23115384615384618,
|
|
"eval_runtime": 86.6494,
|
|
"eval_samples_per_second": 0.231,
|
|
"eval_steps_per_second": 0.231,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 3.38,
|
|
"learning_rate": 0.0002653534183082271,
|
|
"loss": 0.164,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"learning_rate": 0.0002639049826187717,
|
|
"loss": 0.1249,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 3.42,
|
|
"learning_rate": 0.00026245654692931634,
|
|
"loss": 0.1356,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 3.44,
|
|
"learning_rate": 0.000261008111239861,
|
|
"loss": 0.1374,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 3.46,
|
|
"learning_rate": 0.00025955967555040557,
|
|
"loss": 0.2013,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 3.48,
|
|
"learning_rate": 0.0002581112398609502,
|
|
"loss": 0.1337,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"learning_rate": 0.0002566628041714948,
|
|
"loss": 0.1226,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"learning_rate": 0.0002552143684820394,
|
|
"loss": 0.1166,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 3.54,
|
|
"learning_rate": 0.00025376593279258404,
|
|
"loss": 0.2308,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"learning_rate": 0.00025231749710312863,
|
|
"loss": 0.1265,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 3.56,
|
|
"eval_loss": 0.9203845858573914,
|
|
"eval_rouge1": 0.17454545454545453,
|
|
"eval_rouge2": 0.07291666666666667,
|
|
"eval_rougeL": 0.16999999999999998,
|
|
"eval_rougeLsum": 0.17727272727272728,
|
|
"eval_runtime": 87.9704,
|
|
"eval_samples_per_second": 0.227,
|
|
"eval_steps_per_second": 0.227,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 3.58,
|
|
"learning_rate": 0.0002508690614136732,
|
|
"loss": 0.1526,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 3.6,
|
|
"learning_rate": 0.00024942062572421786,
|
|
"loss": 0.2201,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 3.62,
|
|
"learning_rate": 0.00024797219003476245,
|
|
"loss": 0.1271,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 3.64,
|
|
"learning_rate": 0.0002465237543453071,
|
|
"loss": 0.1749,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 0.0002450753186558517,
|
|
"loss": 0.133,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 3.67,
|
|
"learning_rate": 0.0002436268829663963,
|
|
"loss": 0.2259,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 3.69,
|
|
"learning_rate": 0.00024217844727694092,
|
|
"loss": 0.1549,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 3.71,
|
|
"learning_rate": 0.00024073001158748554,
|
|
"loss": 0.1173,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 3.73,
|
|
"learning_rate": 0.00023928157589803013,
|
|
"loss": 0.1337,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"learning_rate": 0.00023783314020857474,
|
|
"loss": 0.1828,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"eval_loss": 0.9094276428222656,
|
|
"eval_rouge1": 0.18,
|
|
"eval_rouge2": 0.14886363636363636,
|
|
"eval_rougeL": 0.18,
|
|
"eval_rougeLsum": 0.18615384615384614,
|
|
"eval_runtime": 84.5106,
|
|
"eval_samples_per_second": 0.237,
|
|
"eval_steps_per_second": 0.237,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 3.77,
|
|
"learning_rate": 0.00023638470451911936,
|
|
"loss": 0.1821,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 3.79,
|
|
"learning_rate": 0.00023493626882966395,
|
|
"loss": 0.1257,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 3.81,
|
|
"learning_rate": 0.00023348783314020857,
|
|
"loss": 0.172,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 3.83,
|
|
"learning_rate": 0.0002320393974507532,
|
|
"loss": 0.1833,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 3.85,
|
|
"learning_rate": 0.0002305909617612978,
|
|
"loss": 0.1334,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 3.87,
|
|
"learning_rate": 0.00022914252607184242,
|
|
"loss": 0.1736,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 3.89,
|
|
"learning_rate": 0.00022769409038238703,
|
|
"loss": 0.1163,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 3.91,
|
|
"learning_rate": 0.00022624565469293165,
|
|
"loss": 0.1844,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 3.93,
|
|
"learning_rate": 0.00022479721900347624,
|
|
"loss": 0.1358,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"learning_rate": 0.00022334878331402086,
|
|
"loss": 0.1447,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 3.95,
|
|
"eval_loss": 0.89415442943573,
|
|
"eval_rouge1": 0.19,
|
|
"eval_rouge2": 0.09886363636363635,
|
|
"eval_rougeL": 0.18615384615384617,
|
|
"eval_rougeLsum": 0.19615384615384615,
|
|
"eval_runtime": 84.0506,
|
|
"eval_samples_per_second": 0.238,
|
|
"eval_steps_per_second": 0.238,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 3.97,
|
|
"learning_rate": 0.00022190034762456547,
|
|
"loss": 0.1566,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 3.99,
|
|
"learning_rate": 0.0002204519119351101,
|
|
"loss": 0.1132,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 4.01,
|
|
"learning_rate": 0.0002190034762456547,
|
|
"loss": 0.1013,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 4.03,
|
|
"learning_rate": 0.00021755504055619932,
|
|
"loss": 0.1554,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 4.05,
|
|
"learning_rate": 0.0002161066048667439,
|
|
"loss": 0.1405,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 4.07,
|
|
"learning_rate": 0.00021465816917728853,
|
|
"loss": 0.152,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 4.09,
|
|
"learning_rate": 0.00021320973348783315,
|
|
"loss": 0.1591,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 4.11,
|
|
"learning_rate": 0.00021176129779837773,
|
|
"loss": 0.152,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 4.13,
|
|
"learning_rate": 0.00021031286210892235,
|
|
"loss": 0.1059,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 0.000208864426419467,
|
|
"loss": 0.099,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"eval_loss": 0.9297454953193665,
|
|
"eval_rouge1": 0.23864468864468863,
|
|
"eval_rouge2": 0.15,
|
|
"eval_rougeL": 0.23516483516483513,
|
|
"eval_rougeLsum": 0.24514652014652014,
|
|
"eval_runtime": 85.7086,
|
|
"eval_samples_per_second": 0.233,
|
|
"eval_steps_per_second": 0.233,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 4.17,
|
|
"learning_rate": 0.0002074159907300116,
|
|
"loss": 0.0916,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 4.19,
|
|
"learning_rate": 0.0002059675550405562,
|
|
"loss": 0.2006,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 4.21,
|
|
"learning_rate": 0.00020451911935110082,
|
|
"loss": 0.144,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 4.23,
|
|
"learning_rate": 0.00020307068366164544,
|
|
"loss": 0.1893,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 4.25,
|
|
"learning_rate": 0.00020162224797219002,
|
|
"loss": 0.1697,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 4.27,
|
|
"learning_rate": 0.00020017381228273464,
|
|
"loss": 0.1101,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 4.29,
|
|
"learning_rate": 0.00019872537659327929,
|
|
"loss": 0.1539,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 4.31,
|
|
"learning_rate": 0.00019727694090382387,
|
|
"loss": 0.1038,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 4.33,
|
|
"learning_rate": 0.0001958285052143685,
|
|
"loss": 0.1466,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"learning_rate": 0.0001943800695249131,
|
|
"loss": 0.1366,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 4.35,
|
|
"eval_loss": 0.9124263525009155,
|
|
"eval_rouge1": 0.12,
|
|
"eval_rouge2": 0.07291666666666667,
|
|
"eval_rougeL": 0.12,
|
|
"eval_rougeLsum": 0.12454545454545454,
|
|
"eval_runtime": 91.5173,
|
|
"eval_samples_per_second": 0.219,
|
|
"eval_steps_per_second": 0.219,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 4.37,
|
|
"learning_rate": 0.0001929316338354577,
|
|
"loss": 0.1298,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 4.39,
|
|
"learning_rate": 0.00019148319814600231,
|
|
"loss": 0.1886,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 4.41,
|
|
"learning_rate": 0.00019003476245654693,
|
|
"loss": 0.1579,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 4.43,
|
|
"learning_rate": 0.00018858632676709152,
|
|
"loss": 0.1078,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 4.45,
|
|
"learning_rate": 0.00018713789107763616,
|
|
"loss": 0.1509,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 4.47,
|
|
"learning_rate": 0.00018568945538818078,
|
|
"loss": 0.108,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 4.49,
|
|
"learning_rate": 0.0001842410196987254,
|
|
"loss": 0.1305,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"learning_rate": 0.00018279258400927,
|
|
"loss": 0.1257,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 4.52,
|
|
"learning_rate": 0.0001813441483198146,
|
|
"loss": 0.117,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"learning_rate": 0.00017989571263035922,
|
|
"loss": 0.1519,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 4.54,
|
|
"eval_loss": 0.9040172696113586,
|
|
"eval_rouge1": 0.18727272727272726,
|
|
"eval_rouge2": 0.09861111111111112,
|
|
"eval_rougeL": 0.18333333333333332,
|
|
"eval_rougeLsum": 0.1906060606060606,
|
|
"eval_runtime": 82.9032,
|
|
"eval_samples_per_second": 0.241,
|
|
"eval_steps_per_second": 0.241,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 4.56,
|
|
"learning_rate": 0.0001784472769409038,
|
|
"loss": 0.1536,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 4.58,
|
|
"learning_rate": 0.00017699884125144843,
|
|
"loss": 0.1159,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 4.6,
|
|
"learning_rate": 0.00017555040556199307,
|
|
"loss": 0.1257,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 4.62,
|
|
"learning_rate": 0.00017410196987253766,
|
|
"loss": 0.1698,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 4.64,
|
|
"learning_rate": 0.00017265353418308228,
|
|
"loss": 0.1369,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 4.66,
|
|
"learning_rate": 0.0001712050984936269,
|
|
"loss": 0.0809,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 4.68,
|
|
"learning_rate": 0.00016975666280417148,
|
|
"loss": 0.1003,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 4.7,
|
|
"learning_rate": 0.0001683082271147161,
|
|
"loss": 0.1066,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 4.72,
|
|
"learning_rate": 0.00016685979142526072,
|
|
"loss": 0.1621,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"learning_rate": 0.00016541135573580533,
|
|
"loss": 0.119,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 4.74,
|
|
"eval_loss": 0.9120545387268066,
|
|
"eval_rouge1": 0.12,
|
|
"eval_rouge2": 0.04583333333333333,
|
|
"eval_rougeL": 0.11285714285714285,
|
|
"eval_rougeLsum": 0.12285714285714286,
|
|
"eval_runtime": 84.1673,
|
|
"eval_samples_per_second": 0.238,
|
|
"eval_steps_per_second": 0.238,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 4.76,
|
|
"learning_rate": 0.00016396292004634995,
|
|
"loss": 0.1185,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 4.78,
|
|
"learning_rate": 0.00016251448435689457,
|
|
"loss": 0.1657,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 4.8,
|
|
"learning_rate": 0.00016106604866743918,
|
|
"loss": 0.1233,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 4.82,
|
|
"learning_rate": 0.00015961761297798377,
|
|
"loss": 0.1611,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 0.0001581691772885284,
|
|
"loss": 0.1613,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 4.86,
|
|
"learning_rate": 0.000156720741599073,
|
|
"loss": 0.1107,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 4.88,
|
|
"learning_rate": 0.0001552723059096176,
|
|
"loss": 0.1436,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 4.9,
|
|
"learning_rate": 0.00015382387022016221,
|
|
"loss": 0.129,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 4.92,
|
|
"learning_rate": 0.00015237543453070686,
|
|
"loss": 0.1586,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"learning_rate": 0.00015092699884125145,
|
|
"loss": 0.1364,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 4.94,
|
|
"eval_loss": 0.9120429754257202,
|
|
"eval_rouge1": 0.20904761904761907,
|
|
"eval_rouge2": 0.12583333333333332,
|
|
"eval_rougeL": 0.20666666666666664,
|
|
"eval_rougeLsum": 0.21904761904761902,
|
|
"eval_runtime": 82.9463,
|
|
"eval_samples_per_second": 0.241,
|
|
"eval_steps_per_second": 0.241,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 4.96,
|
|
"learning_rate": 0.00014947856315179606,
|
|
"loss": 0.1688,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 0.00014803012746234068,
|
|
"loss": 0.1385,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"learning_rate": 0.0001465816917728853,
|
|
"loss": 0.1592,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 5.02,
|
|
"learning_rate": 0.0001451332560834299,
|
|
"loss": 0.1014,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 5.04,
|
|
"learning_rate": 0.0001436848203939745,
|
|
"loss": 0.0796,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 5.06,
|
|
"learning_rate": 0.00014223638470451912,
|
|
"loss": 0.0981,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 5.08,
|
|
"learning_rate": 0.00014078794901506374,
|
|
"loss": 0.093,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 5.1,
|
|
"learning_rate": 0.00013933951332560835,
|
|
"loss": 0.1599,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 5.12,
|
|
"learning_rate": 0.00013789107763615297,
|
|
"loss": 0.1223,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 5.14,
|
|
"learning_rate": 0.00013644264194669756,
|
|
"loss": 0.1,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 5.14,
|
|
"eval_loss": 0.9408878087997437,
|
|
"eval_rouge1": 0.12507936507936507,
|
|
"eval_rouge2": 0.08333333333333333,
|
|
"eval_rougeL": 0.12396825396825398,
|
|
"eval_rougeLsum": 0.13111111111111112,
|
|
"eval_runtime": 96.2103,
|
|
"eval_samples_per_second": 0.208,
|
|
"eval_steps_per_second": 0.208,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 5.16,
|
|
"learning_rate": 0.00013499420625724218,
|
|
"loss": 0.1284,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 5.18,
|
|
"learning_rate": 0.0001335457705677868,
|
|
"loss": 0.1523,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 5.2,
|
|
"learning_rate": 0.00013209733487833138,
|
|
"loss": 0.1051,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 5.22,
|
|
"learning_rate": 0.00013064889918887603,
|
|
"loss": 0.1216,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 5.24,
|
|
"learning_rate": 0.00012920046349942064,
|
|
"loss": 0.1219,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 5.26,
|
|
"learning_rate": 0.00012775202780996523,
|
|
"loss": 0.1482,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 5.28,
|
|
"learning_rate": 0.00012630359212050985,
|
|
"loss": 0.1076,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 5.3,
|
|
"learning_rate": 0.00012485515643105447,
|
|
"loss": 0.121,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 5.32,
|
|
"learning_rate": 0.00012340672074159908,
|
|
"loss": 0.1448,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 5.34,
|
|
"learning_rate": 0.0001219582850521437,
|
|
"loss": 0.1683,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 5.34,
|
|
"eval_loss": 0.9422550201416016,
|
|
"eval_rouge1": 0.13818181818181818,
|
|
"eval_rouge2": 0.0951010101010101,
|
|
"eval_rougeL": 0.13713286713286715,
|
|
"eval_rougeLsum": 0.14174825174825176,
|
|
"eval_runtime": 96.365,
|
|
"eval_samples_per_second": 0.208,
|
|
"eval_steps_per_second": 0.208,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 5.36,
|
|
"learning_rate": 0.0001205098493626883,
|
|
"loss": 0.103,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 5.37,
|
|
"learning_rate": 0.0001190614136732329,
|
|
"loss": 0.1434,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 5.39,
|
|
"learning_rate": 0.00011761297798377752,
|
|
"loss": 0.1419,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 5.41,
|
|
"learning_rate": 0.00011616454229432214,
|
|
"loss": 0.1145,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 5.43,
|
|
"learning_rate": 0.00011471610660486674,
|
|
"loss": 0.1302,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 5.45,
|
|
"learning_rate": 0.00011326767091541136,
|
|
"loss": 0.0718,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 5.47,
|
|
"learning_rate": 0.00011181923522595596,
|
|
"loss": 0.1166,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 5.49,
|
|
"learning_rate": 0.0001103707995365006,
|
|
"loss": 0.1265,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 5.51,
|
|
"learning_rate": 0.0001089223638470452,
|
|
"loss": 0.0972,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 5.53,
|
|
"learning_rate": 0.0001074739281575898,
|
|
"loss": 0.1395,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 5.53,
|
|
"eval_loss": 0.9336325526237488,
|
|
"eval_rouge1": 0.16115384615384615,
|
|
"eval_rouge2": 0.12329545454545454,
|
|
"eval_rougeL": 0.15999999999999998,
|
|
"eval_rougeLsum": 0.16307692307692306,
|
|
"eval_runtime": 93.5346,
|
|
"eval_samples_per_second": 0.214,
|
|
"eval_steps_per_second": 0.214,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 5.55,
|
|
"learning_rate": 0.00010602549246813442,
|
|
"loss": 0.0808,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 5.57,
|
|
"learning_rate": 0.00010457705677867903,
|
|
"loss": 0.1205,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 5.59,
|
|
"learning_rate": 0.00010312862108922364,
|
|
"loss": 0.119,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 5.61,
|
|
"learning_rate": 0.00010168018539976825,
|
|
"loss": 0.1357,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 5.63,
|
|
"learning_rate": 0.00010023174971031286,
|
|
"loss": 0.1144,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 5.65,
|
|
"learning_rate": 9.878331402085749e-05,
|
|
"loss": 0.138,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 5.67,
|
|
"learning_rate": 9.733487833140209e-05,
|
|
"loss": 0.0998,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 5.69,
|
|
"learning_rate": 9.588644264194669e-05,
|
|
"loss": 0.1437,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 5.71,
|
|
"learning_rate": 9.443800695249131e-05,
|
|
"loss": 0.1053,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 5.73,
|
|
"learning_rate": 9.298957126303593e-05,
|
|
"loss": 0.1067,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 5.73,
|
|
"eval_loss": 0.9290033578872681,
|
|
"eval_rouge1": 0.2234265734265734,
|
|
"eval_rouge2": 0.13156565656565655,
|
|
"eval_rougeL": 0.21744755244755246,
|
|
"eval_rougeLsum": 0.2169230769230769,
|
|
"eval_runtime": 91.8958,
|
|
"eval_samples_per_second": 0.218,
|
|
"eval_steps_per_second": 0.218,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 5.75,
|
|
"learning_rate": 9.154113557358054e-05,
|
|
"loss": 0.1225,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 5.77,
|
|
"learning_rate": 9.009269988412515e-05,
|
|
"loss": 0.0867,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 5.79,
|
|
"learning_rate": 8.864426419466975e-05,
|
|
"loss": 0.1325,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 5.81,
|
|
"learning_rate": 8.719582850521438e-05,
|
|
"loss": 0.118,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 5.83,
|
|
"learning_rate": 8.574739281575898e-05,
|
|
"loss": 0.112,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 5.85,
|
|
"learning_rate": 8.429895712630359e-05,
|
|
"loss": 0.1326,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 5.87,
|
|
"learning_rate": 8.28505214368482e-05,
|
|
"loss": 0.1506,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 5.89,
|
|
"learning_rate": 8.140208574739282e-05,
|
|
"loss": 0.1499,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 5.91,
|
|
"learning_rate": 7.995365005793744e-05,
|
|
"loss": 0.1092,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 5.93,
|
|
"learning_rate": 7.850521436848204e-05,
|
|
"loss": 0.1104,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 5.93,
|
|
"eval_loss": 0.9244877696037292,
|
|
"eval_rouge1": 0.2,
|
|
"eval_rouge2": 0.1,
|
|
"eval_rougeL": 0.19153846153846155,
|
|
"eval_rougeLsum": 0.19153846153846155,
|
|
"eval_runtime": 93.7022,
|
|
"eval_samples_per_second": 0.213,
|
|
"eval_steps_per_second": 0.213,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 5.95,
|
|
"learning_rate": 7.705677867902664e-05,
|
|
"loss": 0.0824,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 5.97,
|
|
"learning_rate": 7.560834298957127e-05,
|
|
"loss": 0.1048,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 5.99,
|
|
"learning_rate": 7.415990730011588e-05,
|
|
"loss": 0.1295,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 6.01,
|
|
"learning_rate": 7.271147161066048e-05,
|
|
"loss": 0.1296,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 6.03,
|
|
"learning_rate": 7.12630359212051e-05,
|
|
"loss": 0.1146,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 6.05,
|
|
"learning_rate": 6.981460023174971e-05,
|
|
"loss": 0.1158,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 6.07,
|
|
"learning_rate": 6.836616454229433e-05,
|
|
"loss": 0.094,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 6.09,
|
|
"learning_rate": 6.691772885283893e-05,
|
|
"loss": 0.1141,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 6.11,
|
|
"learning_rate": 6.546929316338354e-05,
|
|
"loss": 0.1025,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 6.13,
|
|
"learning_rate": 6.402085747392817e-05,
|
|
"loss": 0.1474,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 6.13,
|
|
"eval_loss": 0.9422538876533508,
|
|
"eval_rouge1": 0.20069541569541566,
|
|
"eval_rouge2": 0.10303030303030303,
|
|
"eval_rougeL": 0.19625097125097124,
|
|
"eval_rougeLsum": 0.19848096348096347,
|
|
"eval_runtime": 94.7246,
|
|
"eval_samples_per_second": 0.211,
|
|
"eval_steps_per_second": 0.211,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 6.15,
|
|
"learning_rate": 6.257242178447277e-05,
|
|
"loss": 0.09,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 6.17,
|
|
"learning_rate": 6.112398609501739e-05,
|
|
"loss": 0.1235,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 6.19,
|
|
"learning_rate": 5.9675550405561996e-05,
|
|
"loss": 0.0733,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 6.21,
|
|
"learning_rate": 5.822711471610661e-05,
|
|
"loss": 0.1035,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 6.22,
|
|
"learning_rate": 5.6778679026651216e-05,
|
|
"loss": 0.1027,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 6.24,
|
|
"learning_rate": 5.533024333719583e-05,
|
|
"loss": 0.0863,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 6.26,
|
|
"learning_rate": 5.388180764774044e-05,
|
|
"loss": 0.095,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 6.28,
|
|
"learning_rate": 5.243337195828506e-05,
|
|
"loss": 0.1103,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 6.3,
|
|
"learning_rate": 5.098493626882966e-05,
|
|
"loss": 0.1325,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 6.32,
|
|
"learning_rate": 4.953650057937428e-05,
|
|
"loss": 0.1052,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 6.32,
|
|
"eval_loss": 0.9328528642654419,
|
|
"eval_rouge1": 0.2023076923076923,
|
|
"eval_rouge2": 0.1102272727272727,
|
|
"eval_rougeL": 0.19999999999999998,
|
|
"eval_rougeLsum": 0.2,
|
|
"eval_runtime": 92.8302,
|
|
"eval_samples_per_second": 0.215,
|
|
"eval_steps_per_second": 0.215,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 6.34,
|
|
"learning_rate": 4.808806488991889e-05,
|
|
"loss": 0.1293,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 6.36,
|
|
"learning_rate": 4.6639629200463506e-05,
|
|
"loss": 0.1301,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 6.38,
|
|
"learning_rate": 4.519119351100811e-05,
|
|
"loss": 0.0873,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 6.4,
|
|
"learning_rate": 4.3742757821552725e-05,
|
|
"loss": 0.1017,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 6.42,
|
|
"learning_rate": 4.2294322132097335e-05,
|
|
"loss": 0.0846,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 6.44,
|
|
"learning_rate": 4.084588644264195e-05,
|
|
"loss": 0.0898,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 6.46,
|
|
"learning_rate": 3.9397450753186555e-05,
|
|
"loss": 0.1494,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 6.48,
|
|
"learning_rate": 3.794901506373117e-05,
|
|
"loss": 0.0742,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"learning_rate": 3.650057937427578e-05,
|
|
"loss": 0.0793,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 6.52,
|
|
"learning_rate": 3.50521436848204e-05,
|
|
"loss": 0.1203,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 6.52,
|
|
"eval_loss": 0.9380243420600891,
|
|
"eval_rouge1": 0.2023076923076923,
|
|
"eval_rouge2": 0.1102272727272727,
|
|
"eval_rougeL": 0.19999999999999998,
|
|
"eval_rougeLsum": 0.2,
|
|
"eval_runtime": 94.5066,
|
|
"eval_samples_per_second": 0.212,
|
|
"eval_steps_per_second": 0.212,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 6.54,
|
|
"learning_rate": 3.360370799536501e-05,
|
|
"loss": 0.1257,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 6.56,
|
|
"learning_rate": 3.215527230590962e-05,
|
|
"loss": 0.1177,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 6.58,
|
|
"learning_rate": 3.070683661645423e-05,
|
|
"loss": 0.1359,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 6.6,
|
|
"learning_rate": 2.9258400926998842e-05,
|
|
"loss": 0.1303,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 6.62,
|
|
"learning_rate": 2.7809965237543452e-05,
|
|
"loss": 0.0968,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 6.64,
|
|
"learning_rate": 2.6361529548088065e-05,
|
|
"loss": 0.1061,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 6.66,
|
|
"learning_rate": 2.4913093858632675e-05,
|
|
"loss": 0.1307,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 6.68,
|
|
"learning_rate": 2.346465816917729e-05,
|
|
"loss": 0.0981,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 6.7,
|
|
"learning_rate": 2.20162224797219e-05,
|
|
"loss": 0.0901,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 6.72,
|
|
"learning_rate": 2.0567786790266515e-05,
|
|
"loss": 0.1125,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 6.72,
|
|
"eval_loss": 0.9421626925468445,
|
|
"eval_rouge1": 0.18958041958041955,
|
|
"eval_rouge2": 0.0977272727272727,
|
|
"eval_rougeL": 0.18615384615384614,
|
|
"eval_rougeLsum": 0.19,
|
|
"eval_runtime": 96.4239,
|
|
"eval_samples_per_second": 0.207,
|
|
"eval_steps_per_second": 0.207,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 6.74,
|
|
"learning_rate": 1.9119351100811125e-05,
|
|
"loss": 0.0823,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 6.76,
|
|
"learning_rate": 1.767091541135574e-05,
|
|
"loss": 0.1016,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 6.78,
|
|
"learning_rate": 1.6222479721900348e-05,
|
|
"loss": 0.1172,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 6.8,
|
|
"learning_rate": 1.477404403244496e-05,
|
|
"loss": 0.0959,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 6.82,
|
|
"learning_rate": 1.3325608342989572e-05,
|
|
"loss": 0.1534,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 6.84,
|
|
"learning_rate": 1.1877172653534183e-05,
|
|
"loss": 0.125,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 6.86,
|
|
"learning_rate": 1.0428736964078795e-05,
|
|
"loss": 0.1221,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 6.88,
|
|
"learning_rate": 8.980301274623406e-06,
|
|
"loss": 0.1391,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 6.9,
|
|
"learning_rate": 7.531865585168019e-06,
|
|
"loss": 0.0986,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 6.92,
|
|
"learning_rate": 6.083429895712631e-06,
|
|
"loss": 0.1323,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 6.92,
|
|
"eval_loss": 0.9433181881904602,
|
|
"eval_rouge1": 0.19,
|
|
"eval_rouge2": 0.0977272727272727,
|
|
"eval_rougeL": 0.18615384615384617,
|
|
"eval_rougeLsum": 0.19,
|
|
"eval_runtime": 94.6833,
|
|
"eval_samples_per_second": 0.211,
|
|
"eval_steps_per_second": 0.211,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 6.94,
|
|
"learning_rate": 0.00015593561368209256,
|
|
"loss": 0.1147,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 6.95,
|
|
"learning_rate": 0.00015492957746478874,
|
|
"loss": 0.0879,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 6.97,
|
|
"learning_rate": 0.0001539235412474849,
|
|
"loss": 0.095,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 6.99,
|
|
"learning_rate": 0.00015291750503018109,
|
|
"loss": 0.1277,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 7.02,
|
|
"learning_rate": 0.00015191146881287726,
|
|
"loss": 0.1332,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 7.04,
|
|
"learning_rate": 0.00015090543259557344,
|
|
"loss": 0.1055,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 7.06,
|
|
"learning_rate": 0.00014989939637826964,
|
|
"loss": 0.1114,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 7.07,
|
|
"learning_rate": 0.00014889336016096582,
|
|
"loss": 0.0983,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 7.09,
|
|
"learning_rate": 0.00014788732394366196,
|
|
"loss": 0.0823,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 7.11,
|
|
"learning_rate": 0.00014688128772635814,
|
|
"loss": 0.0949,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 7.11,
|
|
"eval_loss": 0.9529324769973755,
|
|
"eval_rouge1": 0.1603205128205128,
|
|
"eval_rouge2": 0.09454545454545453,
|
|
"eval_rougeL": 0.16115384615384615,
|
|
"eval_rougeLsum": 0.15993589743589742,
|
|
"eval_runtime": 96.2453,
|
|
"eval_samples_per_second": 0.208,
|
|
"eval_steps_per_second": 0.208,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 7.13,
|
|
"learning_rate": 0.00014587525150905434,
|
|
"loss": 0.0698,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 7.15,
|
|
"learning_rate": 0.00014486921529175052,
|
|
"loss": 0.0694,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 7.17,
|
|
"learning_rate": 0.0001438631790744467,
|
|
"loss": 0.1078,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 7.19,
|
|
"learning_rate": 0.00014285714285714284,
|
|
"loss": 0.1292,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 7.21,
|
|
"learning_rate": 0.00014185110663983904,
|
|
"loss": 0.1175,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 7.23,
|
|
"learning_rate": 0.00014084507042253522,
|
|
"loss": 0.1168,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 7.25,
|
|
"learning_rate": 0.0001398390342052314,
|
|
"loss": 0.0948,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 7.27,
|
|
"learning_rate": 0.00013883299798792757,
|
|
"loss": 0.1314,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 7.29,
|
|
"learning_rate": 0.00013782696177062375,
|
|
"loss": 0.1068,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 7.31,
|
|
"learning_rate": 0.00013682092555331992,
|
|
"loss": 0.1059,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 7.31,
|
|
"eval_loss": 0.9520353078842163,
|
|
"eval_rouge1": 0.13832167832167833,
|
|
"eval_rouge2": 0.0977272727272727,
|
|
"eval_rougeL": 0.14185314685314687,
|
|
"eval_rougeLsum": 0.13999999999999999,
|
|
"eval_runtime": 91.3536,
|
|
"eval_samples_per_second": 0.219,
|
|
"eval_steps_per_second": 0.219,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 7.33,
|
|
"learning_rate": 0.0001358148893360161,
|
|
"loss": 0.0945,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 7.35,
|
|
"learning_rate": 0.00013480885311871227,
|
|
"loss": 0.1298,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 7.37,
|
|
"learning_rate": 0.00013380281690140845,
|
|
"loss": 0.0972,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 7.39,
|
|
"learning_rate": 0.00013279678068410465,
|
|
"loss": 0.1007,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 7.41,
|
|
"learning_rate": 0.0001317907444668008,
|
|
"loss": 0.1194,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 7.43,
|
|
"learning_rate": 0.00013078470824949697,
|
|
"loss": 0.1416,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 7.45,
|
|
"learning_rate": 0.00012977867203219315,
|
|
"loss": 0.1112,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 7.47,
|
|
"learning_rate": 0.00012877263581488935,
|
|
"loss": 0.1232,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 7.49,
|
|
"learning_rate": 0.00012776659959758553,
|
|
"loss": 0.1053,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 7.51,
|
|
"learning_rate": 0.0001267605633802817,
|
|
"loss": 0.1482,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 7.51,
|
|
"eval_loss": 0.9513714909553528,
|
|
"eval_rouge1": 0.21115384615384616,
|
|
"eval_rouge2": 0.12045454545454545,
|
|
"eval_rougeL": 0.20999999999999996,
|
|
"eval_rougeLsum": 0.20730769230769228,
|
|
"eval_runtime": 90.7686,
|
|
"eval_samples_per_second": 0.22,
|
|
"eval_steps_per_second": 0.22,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 7.53,
|
|
"learning_rate": 0.00012575452716297785,
|
|
"loss": 0.1281,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 7.55,
|
|
"learning_rate": 0.00012474849094567405,
|
|
"loss": 0.1547,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 7.57,
|
|
"learning_rate": 0.00012374245472837023,
|
|
"loss": 0.1283,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 7.59,
|
|
"learning_rate": 0.0001227364185110664,
|
|
"loss": 0.174,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 7.61,
|
|
"learning_rate": 0.00012173038229376258,
|
|
"loss": 0.0827,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 7.63,
|
|
"learning_rate": 0.00012072434607645876,
|
|
"loss": 0.1174,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 7.65,
|
|
"learning_rate": 0.00011971830985915493,
|
|
"loss": 0.0914,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 7.67,
|
|
"learning_rate": 0.0001187122736418511,
|
|
"loss": 0.1205,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 7.69,
|
|
"learning_rate": 0.00011770623742454728,
|
|
"loss": 0.0821,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 7.71,
|
|
"learning_rate": 0.00011670020120724347,
|
|
"loss": 0.1268,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 7.71,
|
|
"eval_loss": 0.938602089881897,
|
|
"eval_rouge1": 0.20384615384615384,
|
|
"eval_rouge2": 0.10909090909090909,
|
|
"eval_rougeL": 0.20153846153846153,
|
|
"eval_rougeLsum": 0.20076923076923076,
|
|
"eval_runtime": 89.8217,
|
|
"eval_samples_per_second": 0.223,
|
|
"eval_steps_per_second": 0.223,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 7.73,
|
|
"learning_rate": 0.00011569416498993963,
|
|
"loss": 0.0964,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 7.75,
|
|
"learning_rate": 0.00011468812877263582,
|
|
"loss": 0.0878,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 7.77,
|
|
"learning_rate": 0.00011368209255533198,
|
|
"loss": 0.1205,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 7.79,
|
|
"learning_rate": 0.00011267605633802817,
|
|
"loss": 0.0916,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 7.8,
|
|
"learning_rate": 0.00011167002012072435,
|
|
"loss": 0.1021,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 7.82,
|
|
"learning_rate": 0.00011066398390342052,
|
|
"loss": 0.0843,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 7.84,
|
|
"learning_rate": 0.0001096579476861167,
|
|
"loss": 0.0947,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 7.86,
|
|
"learning_rate": 0.00010865191146881289,
|
|
"loss": 0.0884,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 7.88,
|
|
"learning_rate": 0.00010764587525150905,
|
|
"loss": 0.0943,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 7.9,
|
|
"learning_rate": 0.00010663983903420524,
|
|
"loss": 0.089,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 7.9,
|
|
"eval_loss": 0.9426229596138,
|
|
"eval_rouge1": 0.15076923076923077,
|
|
"eval_rouge2": 0.1181818181818182,
|
|
"eval_rougeL": 0.15615384615384614,
|
|
"eval_rougeLsum": 0.15384615384615383,
|
|
"eval_runtime": 90.9011,
|
|
"eval_samples_per_second": 0.22,
|
|
"eval_steps_per_second": 0.22,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 7.92,
|
|
"learning_rate": 0.00010563380281690141,
|
|
"loss": 0.1264,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 7.94,
|
|
"learning_rate": 0.00010462776659959759,
|
|
"loss": 0.1026,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 7.96,
|
|
"learning_rate": 0.00010362173038229377,
|
|
"loss": 0.1348,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 7.98,
|
|
"learning_rate": 0.00010261569416498995,
|
|
"loss": 0.1893,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"learning_rate": 0.00010160965794768612,
|
|
"loss": 0.1048,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 8.02,
|
|
"learning_rate": 0.0001006036217303823,
|
|
"loss": 0.1576,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 8.04,
|
|
"learning_rate": 9.959758551307847e-05,
|
|
"loss": 0.1084,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 8.06,
|
|
"learning_rate": 9.859154929577464e-05,
|
|
"loss": 0.089,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 8.08,
|
|
"learning_rate": 9.758551307847083e-05,
|
|
"loss": 0.0989,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 8.1,
|
|
"learning_rate": 9.6579476861167e-05,
|
|
"loss": 0.108,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 8.1,
|
|
"eval_loss": 0.9726575016975403,
|
|
"eval_rouge1": 0.1383333333333333,
|
|
"eval_rouge2": 0.10340909090909092,
|
|
"eval_rougeL": 0.14448717948717948,
|
|
"eval_rougeLsum": 0.13666666666666666,
|
|
"eval_runtime": 89.426,
|
|
"eval_samples_per_second": 0.224,
|
|
"eval_steps_per_second": 0.224,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 8.12,
|
|
"learning_rate": 9.557344064386318e-05,
|
|
"loss": 0.0576,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 8.14,
|
|
"learning_rate": 9.456740442655936e-05,
|
|
"loss": 0.0937,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 8.16,
|
|
"learning_rate": 9.356136820925553e-05,
|
|
"loss": 0.0814,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 8.18,
|
|
"learning_rate": 9.255533199195171e-05,
|
|
"loss": 0.0832,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 8.2,
|
|
"learning_rate": 9.15492957746479e-05,
|
|
"loss": 0.0881,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 8.22,
|
|
"learning_rate": 9.054325955734406e-05,
|
|
"loss": 0.0785,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 8.24,
|
|
"learning_rate": 8.953722334004025e-05,
|
|
"loss": 0.1046,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 8.26,
|
|
"learning_rate": 8.853118712273642e-05,
|
|
"loss": 0.1137,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 8.28,
|
|
"learning_rate": 8.75251509054326e-05,
|
|
"loss": 0.0966,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 8.3,
|
|
"learning_rate": 8.651911468812877e-05,
|
|
"loss": 0.1292,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 8.3,
|
|
"eval_loss": 0.9639500379562378,
|
|
"eval_rouge1": 0.21000000000000002,
|
|
"eval_rouge2": 0.12563131313131312,
|
|
"eval_rougeL": 0.2097902097902098,
|
|
"eval_rougeLsum": 0.20979020979020976,
|
|
"eval_runtime": 86.8779,
|
|
"eval_samples_per_second": 0.23,
|
|
"eval_steps_per_second": 0.23,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 8.32,
|
|
"learning_rate": 8.551307847082495e-05,
|
|
"loss": 0.099,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 8.34,
|
|
"learning_rate": 8.450704225352113e-05,
|
|
"loss": 0.082,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 8.36,
|
|
"learning_rate": 8.350100603621731e-05,
|
|
"loss": 0.1007,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 8.38,
|
|
"learning_rate": 8.249496981891348e-05,
|
|
"loss": 0.0826,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 8.4,
|
|
"learning_rate": 8.148893360160967e-05,
|
|
"loss": 0.0823,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 8.42,
|
|
"learning_rate": 8.048289738430584e-05,
|
|
"loss": 0.0863,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 8.44,
|
|
"learning_rate": 7.9476861167002e-05,
|
|
"loss": 0.1037,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 8.46,
|
|
"learning_rate": 7.847082494969819e-05,
|
|
"loss": 0.097,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 8.48,
|
|
"learning_rate": 7.746478873239437e-05,
|
|
"loss": 0.0589,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 8.5,
|
|
"learning_rate": 7.645875251509054e-05,
|
|
"loss": 0.0868,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 8.5,
|
|
"eval_loss": 0.9618169069290161,
|
|
"eval_rouge1": 0.15,
|
|
"eval_rouge2": 0.09431818181818181,
|
|
"eval_rougeL": 0.15076923076923077,
|
|
"eval_rougeLsum": 0.1465384615384615,
|
|
"eval_runtime": 86.2134,
|
|
"eval_samples_per_second": 0.232,
|
|
"eval_steps_per_second": 0.232,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 8.52,
|
|
"learning_rate": 7.545271629778672e-05,
|
|
"loss": 0.0964,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 8.54,
|
|
"learning_rate": 7.444668008048291e-05,
|
|
"loss": 0.1144,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 8.56,
|
|
"learning_rate": 7.344064386317907e-05,
|
|
"loss": 0.1029,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 8.58,
|
|
"learning_rate": 7.243460764587526e-05,
|
|
"loss": 0.0978,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 8.6,
|
|
"learning_rate": 7.142857142857142e-05,
|
|
"loss": 0.142,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 8.62,
|
|
"learning_rate": 7.042253521126761e-05,
|
|
"loss": 0.0957,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 8.64,
|
|
"learning_rate": 6.941649899396378e-05,
|
|
"loss": 0.0896,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 8.65,
|
|
"learning_rate": 6.841046277665996e-05,
|
|
"loss": 0.0998,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 8.67,
|
|
"learning_rate": 6.740442655935614e-05,
|
|
"loss": 0.0828,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 8.69,
|
|
"learning_rate": 6.639839034205232e-05,
|
|
"loss": 0.1023,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 8.69,
|
|
"eval_loss": 0.9609012603759766,
|
|
"eval_rouge1": 0.18,
|
|
"eval_rouge2": 0.075,
|
|
"eval_rougeL": 0.18,
|
|
"eval_rougeLsum": 0.18,
|
|
"eval_runtime": 85.733,
|
|
"eval_samples_per_second": 0.233,
|
|
"eval_steps_per_second": 0.233,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 8.71,
|
|
"learning_rate": 6.539235412474849e-05,
|
|
"loss": 0.1324,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 8.73,
|
|
"learning_rate": 6.438631790744468e-05,
|
|
"loss": 0.1107,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 8.75,
|
|
"learning_rate": 6.338028169014085e-05,
|
|
"loss": 0.0756,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 8.77,
|
|
"learning_rate": 6.237424547283703e-05,
|
|
"loss": 0.1019,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 8.79,
|
|
"learning_rate": 6.13682092555332e-05,
|
|
"loss": 0.1232,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 8.81,
|
|
"learning_rate": 6.036217303822938e-05,
|
|
"loss": 0.1186,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 8.83,
|
|
"learning_rate": 5.935613682092555e-05,
|
|
"loss": 0.1093,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 8.85,
|
|
"learning_rate": 5.8350100603621735e-05,
|
|
"loss": 0.1009,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 8.87,
|
|
"learning_rate": 5.734406438631791e-05,
|
|
"loss": 0.0878,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 8.89,
|
|
"learning_rate": 5.6338028169014086e-05,
|
|
"loss": 0.1102,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 8.89,
|
|
"eval_loss": 0.9644363522529602,
|
|
"eval_rouge1": 0.14615384615384613,
|
|
"eval_rouge2": 0.1,
|
|
"eval_rougeL": 0.15115384615384614,
|
|
"eval_rougeLsum": 0.145,
|
|
"eval_runtime": 83.9759,
|
|
"eval_samples_per_second": 0.238,
|
|
"eval_steps_per_second": 0.238,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 8.91,
|
|
"learning_rate": 5.533199195171026e-05,
|
|
"loss": 0.1363,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 8.93,
|
|
"learning_rate": 5.4325955734406444e-05,
|
|
"loss": 0.0738,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 8.95,
|
|
"learning_rate": 5.331991951710262e-05,
|
|
"loss": 0.0649,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 8.97,
|
|
"learning_rate": 5.2313883299798795e-05,
|
|
"loss": 0.0971,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 8.99,
|
|
"learning_rate": 5.130784708249498e-05,
|
|
"loss": 0.0688,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 9.01,
|
|
"learning_rate": 5.030181086519115e-05,
|
|
"loss": 0.0837,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 9.03,
|
|
"learning_rate": 4.929577464788732e-05,
|
|
"loss": 0.0891,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 9.05,
|
|
"learning_rate": 4.82897384305835e-05,
|
|
"loss": 0.0744,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 9.07,
|
|
"learning_rate": 4.728370221327968e-05,
|
|
"loss": 0.0906,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 9.09,
|
|
"learning_rate": 4.6277665995975854e-05,
|
|
"loss": 0.1102,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 9.09,
|
|
"eval_loss": 0.9806769490242004,
|
|
"eval_rouge1": 0.12615384615384617,
|
|
"eval_rouge2": 0.08636363636363635,
|
|
"eval_rougeL": 0.13615384615384613,
|
|
"eval_rougeLsum": 0.12615384615384617,
|
|
"eval_runtime": 88.592,
|
|
"eval_samples_per_second": 0.226,
|
|
"eval_steps_per_second": 0.226,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 9.11,
|
|
"learning_rate": 4.527162977867203e-05,
|
|
"loss": 0.072,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 9.13,
|
|
"learning_rate": 4.426559356136821e-05,
|
|
"loss": 0.0729,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 9.15,
|
|
"learning_rate": 4.325955734406439e-05,
|
|
"loss": 0.0884,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 9.17,
|
|
"learning_rate": 4.225352112676056e-05,
|
|
"loss": 0.0782,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 9.19,
|
|
"learning_rate": 4.124748490945674e-05,
|
|
"loss": 0.0879,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 9.21,
|
|
"learning_rate": 4.024144869215292e-05,
|
|
"loss": 0.1,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 9.23,
|
|
"learning_rate": 3.9235412474849096e-05,
|
|
"loss": 0.0867,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 9.25,
|
|
"learning_rate": 3.822937625754527e-05,
|
|
"loss": 0.0881,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 9.27,
|
|
"learning_rate": 3.7223340040241454e-05,
|
|
"loss": 0.0903,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 9.29,
|
|
"learning_rate": 3.621730382293763e-05,
|
|
"loss": 0.0942,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 9.29,
|
|
"eval_loss": 0.9865831136703491,
|
|
"eval_rouge1": 0.13999999999999999,
|
|
"eval_rouge2": 0.09772727272727272,
|
|
"eval_rougeL": 0.14615384615384613,
|
|
"eval_rougeLsum": 0.13999999999999999,
|
|
"eval_runtime": 86.651,
|
|
"eval_samples_per_second": 0.231,
|
|
"eval_steps_per_second": 0.231,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 9.31,
|
|
"learning_rate": 3.5211267605633805e-05,
|
|
"loss": 0.1079,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 9.33,
|
|
"learning_rate": 3.420523138832998e-05,
|
|
"loss": 0.0807,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 9.35,
|
|
"learning_rate": 3.319919517102616e-05,
|
|
"loss": 0.105,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 9.37,
|
|
"learning_rate": 3.219315895372234e-05,
|
|
"loss": 0.095,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 9.39,
|
|
"learning_rate": 3.118712273641851e-05,
|
|
"loss": 0.0965,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 9.41,
|
|
"learning_rate": 3.018108651911469e-05,
|
|
"loss": 0.1001,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 9.43,
|
|
"learning_rate": 2.9175050301810868e-05,
|
|
"loss": 0.0782,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 9.45,
|
|
"learning_rate": 2.8169014084507043e-05,
|
|
"loss": 0.115,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 9.47,
|
|
"learning_rate": 2.7162977867203222e-05,
|
|
"loss": 0.071,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 9.49,
|
|
"learning_rate": 2.6156941649899397e-05,
|
|
"loss": 0.129,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 9.49,
|
|
"eval_loss": 0.9853466153144836,
|
|
"eval_rouge1": 0.12837606837606838,
|
|
"eval_rouge2": 0.08636363636363635,
|
|
"eval_rougeL": 0.13615384615384613,
|
|
"eval_rougeLsum": 0.1294871794871795,
|
|
"eval_runtime": 86.6444,
|
|
"eval_samples_per_second": 0.231,
|
|
"eval_steps_per_second": 0.231,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 9.5,
|
|
"learning_rate": 2.5150905432595576e-05,
|
|
"loss": 0.1285,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 9.52,
|
|
"learning_rate": 2.414486921529175e-05,
|
|
"loss": 0.0747,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 9.54,
|
|
"learning_rate": 2.3138832997987927e-05,
|
|
"loss": 0.0702,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 9.56,
|
|
"learning_rate": 2.2132796780684106e-05,
|
|
"loss": 0.1029,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 9.58,
|
|
"learning_rate": 2.112676056338028e-05,
|
|
"loss": 0.102,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 9.6,
|
|
"learning_rate": 2.012072434607646e-05,
|
|
"loss": 0.0909,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 9.62,
|
|
"learning_rate": 1.9114688128772636e-05,
|
|
"loss": 0.0849,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 9.64,
|
|
"learning_rate": 1.8108651911468815e-05,
|
|
"loss": 0.1216,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 9.66,
|
|
"learning_rate": 1.710261569416499e-05,
|
|
"loss": 0.1016,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 9.68,
|
|
"learning_rate": 1.609657947686117e-05,
|
|
"loss": 0.0949,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 9.68,
|
|
"eval_loss": 0.9819391965866089,
|
|
"eval_rouge1": 0.1911111111111111,
|
|
"eval_rouge2": 0.09772727272727272,
|
|
"eval_rougeL": 0.19615384615384612,
|
|
"eval_rougeLsum": 0.1923076923076923,
|
|
"eval_runtime": 87.241,
|
|
"eval_samples_per_second": 0.229,
|
|
"eval_steps_per_second": 0.229,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 9.7,
|
|
"learning_rate": 1.5090543259557344e-05,
|
|
"loss": 0.0692,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 9.72,
|
|
"learning_rate": 1.4084507042253522e-05,
|
|
"loss": 0.0653,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 9.74,
|
|
"learning_rate": 1.3078470824949699e-05,
|
|
"loss": 0.0797,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 9.76,
|
|
"learning_rate": 1.2072434607645874e-05,
|
|
"loss": 0.0905,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 9.78,
|
|
"learning_rate": 1.1066398390342053e-05,
|
|
"loss": 0.0868,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 9.8,
|
|
"learning_rate": 1.006036217303823e-05,
|
|
"loss": 0.0964,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 9.82,
|
|
"learning_rate": 9.054325955734407e-06,
|
|
"loss": 0.0913,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 9.84,
|
|
"learning_rate": 8.048289738430584e-06,
|
|
"loss": 0.0708,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 9.86,
|
|
"learning_rate": 7.042253521126761e-06,
|
|
"loss": 0.1102,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 9.88,
|
|
"learning_rate": 6.036217303822937e-06,
|
|
"loss": 0.0852,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 9.88,
|
|
"eval_loss": 0.9852367639541626,
|
|
"eval_rouge1": 0.12615384615384617,
|
|
"eval_rouge2": 0.08636363636363635,
|
|
"eval_rougeL": 0.13615384615384613,
|
|
"eval_rougeLsum": 0.12615384615384617,
|
|
"eval_runtime": 87.473,
|
|
"eval_samples_per_second": 0.229,
|
|
"eval_steps_per_second": 0.229,
|
|
"step": 5000
|
|
}
|
|
],
|
|
"max_steps": 5060,
|
|
"num_train_epochs": 10,
|
|
"total_flos": 1.2175308798022656e+17,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|