Samzy17's picture
Add output of finetuning tinyllama
4a2fa16 verified
raw
history blame
19.2 kB
{
"best_metric": 1.000998854637146,
"best_model_checkpoint": "./lora-alpaca3/checkpoint-1000",
"epoch": 2.572347266881029,
"eval_steps": 200,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02572347266881029,
"grad_norm": 0.5646638870239258,
"learning_rate": 2.9999999999999997e-05,
"loss": 1.9009,
"step": 10
},
{
"epoch": 0.05144694533762058,
"grad_norm": 0.5377612709999084,
"learning_rate": 5.9999999999999995e-05,
"loss": 1.8355,
"step": 20
},
{
"epoch": 0.07717041800643087,
"grad_norm": 0.5958595871925354,
"learning_rate": 8.999999999999999e-05,
"loss": 1.6639,
"step": 30
},
{
"epoch": 0.10289389067524116,
"grad_norm": 0.5721263885498047,
"learning_rate": 0.00011999999999999999,
"loss": 1.3994,
"step": 40
},
{
"epoch": 0.12861736334405144,
"grad_norm": 0.2665337026119232,
"learning_rate": 0.00015,
"loss": 1.1767,
"step": 50
},
{
"epoch": 0.15434083601286175,
"grad_norm": 0.12332847714424133,
"learning_rate": 0.00017999999999999998,
"loss": 1.1413,
"step": 60
},
{
"epoch": 0.18006430868167203,
"grad_norm": 0.12157242000102997,
"learning_rate": 0.00020999999999999998,
"loss": 1.1103,
"step": 70
},
{
"epoch": 0.2057877813504823,
"grad_norm": 0.12799611687660217,
"learning_rate": 0.00023999999999999998,
"loss": 1.1003,
"step": 80
},
{
"epoch": 0.2315112540192926,
"grad_norm": 0.12139850109815598,
"learning_rate": 0.00027,
"loss": 1.084,
"step": 90
},
{
"epoch": 0.2572347266881029,
"grad_norm": 0.144248366355896,
"learning_rate": 0.0003,
"loss": 1.0722,
"step": 100
},
{
"epoch": 0.2829581993569132,
"grad_norm": 0.15326175093650818,
"learning_rate": 0.00029718045112781953,
"loss": 1.0731,
"step": 110
},
{
"epoch": 0.3086816720257235,
"grad_norm": 0.12561754882335663,
"learning_rate": 0.00029436090225563904,
"loss": 1.0537,
"step": 120
},
{
"epoch": 0.33440514469453375,
"grad_norm": 0.15570670366287231,
"learning_rate": 0.0002915413533834586,
"loss": 1.049,
"step": 130
},
{
"epoch": 0.36012861736334406,
"grad_norm": 0.14343878626823425,
"learning_rate": 0.00028872180451127816,
"loss": 1.0273,
"step": 140
},
{
"epoch": 0.3858520900321543,
"grad_norm": 0.1315772831439972,
"learning_rate": 0.0002859022556390977,
"loss": 1.0529,
"step": 150
},
{
"epoch": 0.4115755627009646,
"grad_norm": 0.13874055445194244,
"learning_rate": 0.0002830827067669173,
"loss": 1.044,
"step": 160
},
{
"epoch": 0.43729903536977494,
"grad_norm": 0.1271531730890274,
"learning_rate": 0.00028026315789473683,
"loss": 1.0388,
"step": 170
},
{
"epoch": 0.4630225080385852,
"grad_norm": 0.12684497237205505,
"learning_rate": 0.0002774436090225564,
"loss": 1.0385,
"step": 180
},
{
"epoch": 0.4887459807073955,
"grad_norm": 0.11675913631916046,
"learning_rate": 0.0002746240601503759,
"loss": 1.0349,
"step": 190
},
{
"epoch": 0.5144694533762058,
"grad_norm": 0.15151233971118927,
"learning_rate": 0.00027180451127819546,
"loss": 1.0282,
"step": 200
},
{
"epoch": 0.5144694533762058,
"eval_loss": 1.0400722026824951,
"eval_runtime": 55.5366,
"eval_samples_per_second": 36.012,
"eval_steps_per_second": 4.502,
"step": 200
},
{
"epoch": 0.5401929260450161,
"grad_norm": 0.1420961171388626,
"learning_rate": 0.000268984962406015,
"loss": 1.0297,
"step": 210
},
{
"epoch": 0.5659163987138264,
"grad_norm": 0.13161741197109222,
"learning_rate": 0.0002661654135338346,
"loss": 1.0276,
"step": 220
},
{
"epoch": 0.5916398713826366,
"grad_norm": 0.13450871407985687,
"learning_rate": 0.00026334586466165413,
"loss": 1.0285,
"step": 230
},
{
"epoch": 0.617363344051447,
"grad_norm": 0.13193918764591217,
"learning_rate": 0.0002605263157894737,
"loss": 1.0309,
"step": 240
},
{
"epoch": 0.6430868167202572,
"grad_norm": 0.1393500566482544,
"learning_rate": 0.0002577067669172932,
"loss": 1.0315,
"step": 250
},
{
"epoch": 0.6688102893890675,
"grad_norm": 0.11902929842472076,
"learning_rate": 0.00025488721804511276,
"loss": 1.0347,
"step": 260
},
{
"epoch": 0.6945337620578779,
"grad_norm": 0.1258976310491562,
"learning_rate": 0.0002520676691729323,
"loss": 1.0273,
"step": 270
},
{
"epoch": 0.7202572347266881,
"grad_norm": 0.12598267197608948,
"learning_rate": 0.0002492481203007519,
"loss": 1.0128,
"step": 280
},
{
"epoch": 0.7459807073954984,
"grad_norm": 0.122463159263134,
"learning_rate": 0.0002464285714285714,
"loss": 1.0251,
"step": 290
},
{
"epoch": 0.7717041800643086,
"grad_norm": 0.1293506771326065,
"learning_rate": 0.00024360902255639094,
"loss": 1.0317,
"step": 300
},
{
"epoch": 0.797427652733119,
"grad_norm": 0.13788191974163055,
"learning_rate": 0.00024078947368421052,
"loss": 1.029,
"step": 310
},
{
"epoch": 0.8231511254019293,
"grad_norm": 0.1445332169532776,
"learning_rate": 0.00023796992481203005,
"loss": 1.0173,
"step": 320
},
{
"epoch": 0.8488745980707395,
"grad_norm": 0.11408714205026627,
"learning_rate": 0.00023515037593984961,
"loss": 1.0109,
"step": 330
},
{
"epoch": 0.8745980707395499,
"grad_norm": 0.11767855286598206,
"learning_rate": 0.00023233082706766915,
"loss": 1.0116,
"step": 340
},
{
"epoch": 0.9003215434083601,
"grad_norm": 0.12555481493473053,
"learning_rate": 0.0002295112781954887,
"loss": 1.0144,
"step": 350
},
{
"epoch": 0.9260450160771704,
"grad_norm": 0.12403016537427902,
"learning_rate": 0.00022669172932330824,
"loss": 1.0299,
"step": 360
},
{
"epoch": 0.9517684887459807,
"grad_norm": 0.12968535721302032,
"learning_rate": 0.0002238721804511278,
"loss": 1.0081,
"step": 370
},
{
"epoch": 0.977491961414791,
"grad_norm": 0.12369370460510254,
"learning_rate": 0.00022105263157894733,
"loss": 1.0065,
"step": 380
},
{
"epoch": 1.0032154340836013,
"grad_norm": 0.12212081998586655,
"learning_rate": 0.0002182330827067669,
"loss": 1.0448,
"step": 390
},
{
"epoch": 1.0289389067524115,
"grad_norm": 0.1301109939813614,
"learning_rate": 0.00021541353383458647,
"loss": 1.0052,
"step": 400
},
{
"epoch": 1.0289389067524115,
"eval_loss": 1.0189071893692017,
"eval_runtime": 55.4593,
"eval_samples_per_second": 36.062,
"eval_steps_per_second": 4.508,
"step": 400
},
{
"epoch": 1.0546623794212218,
"grad_norm": 0.13505908846855164,
"learning_rate": 0.000212593984962406,
"loss": 1.0009,
"step": 410
},
{
"epoch": 1.0803858520900322,
"grad_norm": 0.12307268381118774,
"learning_rate": 0.00020977443609022556,
"loss": 1.0039,
"step": 420
},
{
"epoch": 1.1061093247588425,
"grad_norm": 0.11501840502023697,
"learning_rate": 0.0002069548872180451,
"loss": 0.9988,
"step": 430
},
{
"epoch": 1.1318327974276527,
"grad_norm": 0.12922251224517822,
"learning_rate": 0.00020413533834586463,
"loss": 0.9938,
"step": 440
},
{
"epoch": 1.157556270096463,
"grad_norm": 0.12304320186376572,
"learning_rate": 0.0002013157894736842,
"loss": 1.0223,
"step": 450
},
{
"epoch": 1.1832797427652733,
"grad_norm": 0.11323860287666321,
"learning_rate": 0.00019849624060150372,
"loss": 0.9957,
"step": 460
},
{
"epoch": 1.2090032154340835,
"grad_norm": 0.1335834562778473,
"learning_rate": 0.00019567669172932328,
"loss": 1.0042,
"step": 470
},
{
"epoch": 1.234726688102894,
"grad_norm": 0.12371603399515152,
"learning_rate": 0.00019285714285714286,
"loss": 1.0056,
"step": 480
},
{
"epoch": 1.2604501607717042,
"grad_norm": 0.12318002432584763,
"learning_rate": 0.0001900375939849624,
"loss": 1.0122,
"step": 490
},
{
"epoch": 1.2861736334405145,
"grad_norm": 0.13187570869922638,
"learning_rate": 0.00018721804511278195,
"loss": 0.9971,
"step": 500
},
{
"epoch": 1.3118971061093248,
"grad_norm": 0.11861589550971985,
"learning_rate": 0.00018439849624060149,
"loss": 0.9926,
"step": 510
},
{
"epoch": 1.337620578778135,
"grad_norm": 0.11287492513656616,
"learning_rate": 0.00018157894736842105,
"loss": 1.016,
"step": 520
},
{
"epoch": 1.3633440514469453,
"grad_norm": 0.1384376585483551,
"learning_rate": 0.00017875939849624058,
"loss": 1.0059,
"step": 530
},
{
"epoch": 1.3890675241157555,
"grad_norm": 0.12147587537765503,
"learning_rate": 0.00017593984962406014,
"loss": 1.0213,
"step": 540
},
{
"epoch": 1.414790996784566,
"grad_norm": 0.13031207025051117,
"learning_rate": 0.00017312030075187967,
"loss": 1.005,
"step": 550
},
{
"epoch": 1.4405144694533762,
"grad_norm": 0.13779744505882263,
"learning_rate": 0.00017030075187969925,
"loss": 1.0077,
"step": 560
},
{
"epoch": 1.4662379421221865,
"grad_norm": 0.13196057081222534,
"learning_rate": 0.00016748120300751879,
"loss": 0.9944,
"step": 570
},
{
"epoch": 1.4919614147909968,
"grad_norm": 0.117030069231987,
"learning_rate": 0.00016466165413533835,
"loss": 1.0082,
"step": 580
},
{
"epoch": 1.517684887459807,
"grad_norm": 0.1274162381887436,
"learning_rate": 0.00016184210526315788,
"loss": 1.0102,
"step": 590
},
{
"epoch": 1.5434083601286175,
"grad_norm": 0.13315673172473907,
"learning_rate": 0.00015902255639097744,
"loss": 0.985,
"step": 600
},
{
"epoch": 1.5434083601286175,
"eval_loss": 1.0096462965011597,
"eval_runtime": 55.507,
"eval_samples_per_second": 36.031,
"eval_steps_per_second": 4.504,
"step": 600
},
{
"epoch": 1.5691318327974275,
"grad_norm": 0.12220187485218048,
"learning_rate": 0.00015620300751879697,
"loss": 0.9912,
"step": 610
},
{
"epoch": 1.594855305466238,
"grad_norm": 0.12432006746530533,
"learning_rate": 0.00015338345864661653,
"loss": 0.9963,
"step": 620
},
{
"epoch": 1.6205787781350482,
"grad_norm": 0.12138667702674866,
"learning_rate": 0.00015056390977443606,
"loss": 1.0014,
"step": 630
},
{
"epoch": 1.6463022508038585,
"grad_norm": 0.13801203668117523,
"learning_rate": 0.00014774436090225562,
"loss": 0.9922,
"step": 640
},
{
"epoch": 1.6720257234726688,
"grad_norm": 0.13330678641796112,
"learning_rate": 0.00014492481203007518,
"loss": 0.9897,
"step": 650
},
{
"epoch": 1.697749196141479,
"grad_norm": 0.12127404659986496,
"learning_rate": 0.0001421052631578947,
"loss": 0.9898,
"step": 660
},
{
"epoch": 1.7234726688102895,
"grad_norm": 0.13159097731113434,
"learning_rate": 0.00013928571428571427,
"loss": 0.9943,
"step": 670
},
{
"epoch": 1.7491961414790995,
"grad_norm": 0.13246209919452667,
"learning_rate": 0.00013646616541353383,
"loss": 1.0048,
"step": 680
},
{
"epoch": 1.77491961414791,
"grad_norm": 0.11693169921636581,
"learning_rate": 0.00013364661654135336,
"loss": 0.9996,
"step": 690
},
{
"epoch": 1.8006430868167203,
"grad_norm": 0.13176512718200684,
"learning_rate": 0.00013082706766917292,
"loss": 0.9923,
"step": 700
},
{
"epoch": 1.8263665594855305,
"grad_norm": 0.1224486380815506,
"learning_rate": 0.00012800751879699248,
"loss": 0.9799,
"step": 710
},
{
"epoch": 1.852090032154341,
"grad_norm": 0.12146233022212982,
"learning_rate": 0.000125187969924812,
"loss": 0.9874,
"step": 720
},
{
"epoch": 1.877813504823151,
"grad_norm": 0.12821047008037567,
"learning_rate": 0.00012236842105263157,
"loss": 0.9982,
"step": 730
},
{
"epoch": 1.9035369774919615,
"grad_norm": 0.12273906171321869,
"learning_rate": 0.00011954887218045111,
"loss": 0.9946,
"step": 740
},
{
"epoch": 1.9292604501607717,
"grad_norm": 0.1282244175672531,
"learning_rate": 0.00011672932330827067,
"loss": 0.9976,
"step": 750
},
{
"epoch": 1.954983922829582,
"grad_norm": 0.13315148651599884,
"learning_rate": 0.00011390977443609022,
"loss": 1.0012,
"step": 760
},
{
"epoch": 1.9807073954983923,
"grad_norm": 0.12510079145431519,
"learning_rate": 0.00011109022556390976,
"loss": 0.9865,
"step": 770
},
{
"epoch": 2.0064308681672025,
"grad_norm": 0.11581259220838547,
"learning_rate": 0.00010827067669172931,
"loss": 1.0306,
"step": 780
},
{
"epoch": 2.032154340836013,
"grad_norm": 0.1278076171875,
"learning_rate": 0.00010545112781954887,
"loss": 0.9984,
"step": 790
},
{
"epoch": 2.057877813504823,
"grad_norm": 0.12879391014575958,
"learning_rate": 0.00010263157894736841,
"loss": 0.9729,
"step": 800
},
{
"epoch": 2.057877813504823,
"eval_loss": 1.0042115449905396,
"eval_runtime": 55.4463,
"eval_samples_per_second": 36.071,
"eval_steps_per_second": 4.509,
"step": 800
},
{
"epoch": 2.0836012861736335,
"grad_norm": 0.1267639547586441,
"learning_rate": 9.981203007518796e-05,
"loss": 0.9899,
"step": 810
},
{
"epoch": 2.1093247588424435,
"grad_norm": 0.13013407588005066,
"learning_rate": 9.69924812030075e-05,
"loss": 0.9913,
"step": 820
},
{
"epoch": 2.135048231511254,
"grad_norm": 0.12404422461986542,
"learning_rate": 9.417293233082706e-05,
"loss": 0.9923,
"step": 830
},
{
"epoch": 2.1607717041800645,
"grad_norm": 0.12091611325740814,
"learning_rate": 9.135338345864661e-05,
"loss": 0.9905,
"step": 840
},
{
"epoch": 2.1864951768488745,
"grad_norm": 0.13067036867141724,
"learning_rate": 8.853383458646615e-05,
"loss": 0.9944,
"step": 850
},
{
"epoch": 2.212218649517685,
"grad_norm": 0.13169625401496887,
"learning_rate": 8.57142857142857e-05,
"loss": 0.9848,
"step": 860
},
{
"epoch": 2.237942122186495,
"grad_norm": 0.13107703626155853,
"learning_rate": 8.289473684210526e-05,
"loss": 0.9712,
"step": 870
},
{
"epoch": 2.2636655948553055,
"grad_norm": 0.1307050734758377,
"learning_rate": 8.00751879699248e-05,
"loss": 0.9855,
"step": 880
},
{
"epoch": 2.289389067524116,
"grad_norm": 0.13297782838344574,
"learning_rate": 7.725563909774435e-05,
"loss": 0.9918,
"step": 890
},
{
"epoch": 2.315112540192926,
"grad_norm": 0.1355738788843155,
"learning_rate": 7.44360902255639e-05,
"loss": 0.9845,
"step": 900
},
{
"epoch": 2.3408360128617365,
"grad_norm": 0.12372130155563354,
"learning_rate": 7.161654135338345e-05,
"loss": 0.9887,
"step": 910
},
{
"epoch": 2.3665594855305465,
"grad_norm": 0.1307709813117981,
"learning_rate": 6.8796992481203e-05,
"loss": 0.9886,
"step": 920
},
{
"epoch": 2.392282958199357,
"grad_norm": 0.12701715528964996,
"learning_rate": 6.597744360902256e-05,
"loss": 0.9885,
"step": 930
},
{
"epoch": 2.418006430868167,
"grad_norm": 0.13114945590496063,
"learning_rate": 6.315789473684209e-05,
"loss": 0.9788,
"step": 940
},
{
"epoch": 2.4437299035369775,
"grad_norm": 0.12744566798210144,
"learning_rate": 6.033834586466165e-05,
"loss": 0.986,
"step": 950
},
{
"epoch": 2.469453376205788,
"grad_norm": 0.13428843021392822,
"learning_rate": 5.7518796992481194e-05,
"loss": 0.974,
"step": 960
},
{
"epoch": 2.495176848874598,
"grad_norm": 0.13112470507621765,
"learning_rate": 5.4699248120300746e-05,
"loss": 0.9818,
"step": 970
},
{
"epoch": 2.5209003215434085,
"grad_norm": 0.1363256722688675,
"learning_rate": 5.187969924812029e-05,
"loss": 0.9909,
"step": 980
},
{
"epoch": 2.5466237942122185,
"grad_norm": 0.13036535680294037,
"learning_rate": 4.9060150375939844e-05,
"loss": 0.9921,
"step": 990
},
{
"epoch": 2.572347266881029,
"grad_norm": 0.13345099985599518,
"learning_rate": 4.624060150375939e-05,
"loss": 0.9722,
"step": 1000
},
{
"epoch": 2.572347266881029,
"eval_loss": 1.000998854637146,
"eval_runtime": 55.4742,
"eval_samples_per_second": 36.053,
"eval_steps_per_second": 4.507,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 1164,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.9627225935210086e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}