lesso's picture
Training in progress, step 150, checkpoint
f74b6c8 verified
raw
history blame
27.8 kB
{
"best_metric": 0.5818656086921692,
"best_model_checkpoint": "miner_id_24/checkpoint-150",
"epoch": 0.487408610885459,
"eval_steps": 50,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003249390739236393,
"grad_norm": 0.6813653707504272,
"learning_rate": 1.0100000000000002e-05,
"loss": 0.5143,
"step": 1
},
{
"epoch": 0.003249390739236393,
"eval_loss": 0.7816610336303711,
"eval_runtime": 34.0242,
"eval_samples_per_second": 121.825,
"eval_steps_per_second": 3.821,
"step": 1
},
{
"epoch": 0.006498781478472786,
"grad_norm": 0.9088014364242554,
"learning_rate": 2.0200000000000003e-05,
"loss": 0.6753,
"step": 2
},
{
"epoch": 0.00974817221770918,
"grad_norm": 0.9956667423248291,
"learning_rate": 3.0299999999999998e-05,
"loss": 0.7492,
"step": 3
},
{
"epoch": 0.012997562956945572,
"grad_norm": 0.8958187699317932,
"learning_rate": 4.0400000000000006e-05,
"loss": 0.7845,
"step": 4
},
{
"epoch": 0.016246953696181964,
"grad_norm": 0.7788242697715759,
"learning_rate": 5.05e-05,
"loss": 0.7895,
"step": 5
},
{
"epoch": 0.01949634443541836,
"grad_norm": 0.7416828274726868,
"learning_rate": 6.0599999999999996e-05,
"loss": 0.8016,
"step": 6
},
{
"epoch": 0.022745735174654752,
"grad_norm": 0.22832198441028595,
"learning_rate": 7.07e-05,
"loss": 0.4376,
"step": 7
},
{
"epoch": 0.025995125913891144,
"grad_norm": 0.48874160647392273,
"learning_rate": 8.080000000000001e-05,
"loss": 0.5812,
"step": 8
},
{
"epoch": 0.02924451665312754,
"grad_norm": 0.6231333017349243,
"learning_rate": 9.09e-05,
"loss": 0.6738,
"step": 9
},
{
"epoch": 0.03249390739236393,
"grad_norm": 0.568499743938446,
"learning_rate": 0.000101,
"loss": 0.711,
"step": 10
},
{
"epoch": 0.03574329813160033,
"grad_norm": 0.4888806641101837,
"learning_rate": 0.00010046842105263158,
"loss": 0.734,
"step": 11
},
{
"epoch": 0.03899268887083672,
"grad_norm": 0.5277363657951355,
"learning_rate": 9.993684210526315e-05,
"loss": 0.7732,
"step": 12
},
{
"epoch": 0.04224207961007311,
"grad_norm": 0.1946847289800644,
"learning_rate": 9.940526315789473e-05,
"loss": 0.4379,
"step": 13
},
{
"epoch": 0.045491470349309504,
"grad_norm": 0.18279439210891724,
"learning_rate": 9.887368421052632e-05,
"loss": 0.5273,
"step": 14
},
{
"epoch": 0.048740861088545896,
"grad_norm": 0.19591547548770905,
"learning_rate": 9.83421052631579e-05,
"loss": 0.6058,
"step": 15
},
{
"epoch": 0.05199025182778229,
"grad_norm": 0.2230876386165619,
"learning_rate": 9.781052631578948e-05,
"loss": 0.7016,
"step": 16
},
{
"epoch": 0.05523964256701868,
"grad_norm": 0.2236296534538269,
"learning_rate": 9.727894736842106e-05,
"loss": 0.6581,
"step": 17
},
{
"epoch": 0.05848903330625508,
"grad_norm": 0.2454998642206192,
"learning_rate": 9.674736842105263e-05,
"loss": 0.699,
"step": 18
},
{
"epoch": 0.06173842404549147,
"grad_norm": 0.17027240991592407,
"learning_rate": 9.621578947368421e-05,
"loss": 0.4311,
"step": 19
},
{
"epoch": 0.06498781478472786,
"grad_norm": 0.1304379254579544,
"learning_rate": 9.568421052631578e-05,
"loss": 0.4936,
"step": 20
},
{
"epoch": 0.06823720552396426,
"grad_norm": 0.20378950238227844,
"learning_rate": 9.515263157894737e-05,
"loss": 0.5676,
"step": 21
},
{
"epoch": 0.07148659626320066,
"grad_norm": 0.24588386714458466,
"learning_rate": 9.462105263157895e-05,
"loss": 0.608,
"step": 22
},
{
"epoch": 0.07473598700243704,
"grad_norm": 0.24846017360687256,
"learning_rate": 9.408947368421054e-05,
"loss": 0.6507,
"step": 23
},
{
"epoch": 0.07798537774167344,
"grad_norm": 0.24984927475452423,
"learning_rate": 9.355789473684211e-05,
"loss": 0.6756,
"step": 24
},
{
"epoch": 0.08123476848090982,
"grad_norm": 0.6067420840263367,
"learning_rate": 9.302631578947369e-05,
"loss": 0.7574,
"step": 25
},
{
"epoch": 0.08448415922014622,
"grad_norm": 0.1186656579375267,
"learning_rate": 9.249473684210526e-05,
"loss": 0.439,
"step": 26
},
{
"epoch": 0.08773354995938261,
"grad_norm": 0.15524183213710785,
"learning_rate": 9.196315789473685e-05,
"loss": 0.5471,
"step": 27
},
{
"epoch": 0.09098294069861901,
"grad_norm": 0.173161119222641,
"learning_rate": 9.143157894736843e-05,
"loss": 0.6089,
"step": 28
},
{
"epoch": 0.09423233143785541,
"grad_norm": 0.21757453680038452,
"learning_rate": 9.09e-05,
"loss": 0.6695,
"step": 29
},
{
"epoch": 0.09748172217709179,
"grad_norm": 0.26508957147598267,
"learning_rate": 9.036842105263158e-05,
"loss": 0.6901,
"step": 30
},
{
"epoch": 0.10073111291632819,
"grad_norm": 0.33836978673934937,
"learning_rate": 8.983684210526316e-05,
"loss": 0.6923,
"step": 31
},
{
"epoch": 0.10398050365556458,
"grad_norm": 0.11899581551551819,
"learning_rate": 8.930526315789474e-05,
"loss": 0.3952,
"step": 32
},
{
"epoch": 0.10722989439480098,
"grad_norm": 0.1592710018157959,
"learning_rate": 8.877368421052632e-05,
"loss": 0.5076,
"step": 33
},
{
"epoch": 0.11047928513403736,
"grad_norm": 0.18337644636631012,
"learning_rate": 8.82421052631579e-05,
"loss": 0.5932,
"step": 34
},
{
"epoch": 0.11372867587327376,
"grad_norm": 0.18385730683803558,
"learning_rate": 8.771052631578948e-05,
"loss": 0.6471,
"step": 35
},
{
"epoch": 0.11697806661251016,
"grad_norm": 0.22672587633132935,
"learning_rate": 8.717894736842105e-05,
"loss": 0.6601,
"step": 36
},
{
"epoch": 0.12022745735174654,
"grad_norm": 0.3419555425643921,
"learning_rate": 8.664736842105263e-05,
"loss": 0.714,
"step": 37
},
{
"epoch": 0.12347684809098294,
"grad_norm": 0.11192555725574493,
"learning_rate": 8.61157894736842e-05,
"loss": 0.4217,
"step": 38
},
{
"epoch": 0.12672623883021933,
"grad_norm": 0.11750304698944092,
"learning_rate": 8.55842105263158e-05,
"loss": 0.4873,
"step": 39
},
{
"epoch": 0.12997562956945571,
"grad_norm": 0.13687068223953247,
"learning_rate": 8.505263157894737e-05,
"loss": 0.5625,
"step": 40
},
{
"epoch": 0.13322502030869213,
"grad_norm": 0.18302011489868164,
"learning_rate": 8.452105263157896e-05,
"loss": 0.6415,
"step": 41
},
{
"epoch": 0.1364744110479285,
"grad_norm": 0.2126861810684204,
"learning_rate": 8.398947368421053e-05,
"loss": 0.6605,
"step": 42
},
{
"epoch": 0.1397238017871649,
"grad_norm": 0.2387421578168869,
"learning_rate": 8.345789473684211e-05,
"loss": 0.6936,
"step": 43
},
{
"epoch": 0.1429731925264013,
"grad_norm": 0.1434362530708313,
"learning_rate": 8.292631578947368e-05,
"loss": 0.4438,
"step": 44
},
{
"epoch": 0.1462225832656377,
"grad_norm": 0.0956588014960289,
"learning_rate": 8.239473684210526e-05,
"loss": 0.476,
"step": 45
},
{
"epoch": 0.14947197400487408,
"grad_norm": 0.12229965627193451,
"learning_rate": 8.186315789473683e-05,
"loss": 0.5585,
"step": 46
},
{
"epoch": 0.15272136474411047,
"grad_norm": 0.14256402850151062,
"learning_rate": 8.133157894736842e-05,
"loss": 0.6346,
"step": 47
},
{
"epoch": 0.15597075548334688,
"grad_norm": 0.16563288867473602,
"learning_rate": 8.080000000000001e-05,
"loss": 0.6593,
"step": 48
},
{
"epoch": 0.15922014622258326,
"grad_norm": 0.22236935794353485,
"learning_rate": 8.026842105263159e-05,
"loss": 0.6548,
"step": 49
},
{
"epoch": 0.16246953696181965,
"grad_norm": 0.46490493416786194,
"learning_rate": 7.973684210526316e-05,
"loss": 0.7019,
"step": 50
},
{
"epoch": 0.16246953696181965,
"eval_loss": 0.5981970429420471,
"eval_runtime": 34.2238,
"eval_samples_per_second": 121.114,
"eval_steps_per_second": 3.799,
"step": 50
},
{
"epoch": 0.16571892770105606,
"grad_norm": 0.08089729398488998,
"learning_rate": 7.920526315789474e-05,
"loss": 0.4069,
"step": 51
},
{
"epoch": 0.16896831844029245,
"grad_norm": 0.1035551056265831,
"learning_rate": 7.867368421052631e-05,
"loss": 0.541,
"step": 52
},
{
"epoch": 0.17221770917952883,
"grad_norm": 0.12616945803165436,
"learning_rate": 7.814210526315789e-05,
"loss": 0.6023,
"step": 53
},
{
"epoch": 0.17546709991876522,
"grad_norm": 0.16298453509807587,
"learning_rate": 7.761052631578946e-05,
"loss": 0.6371,
"step": 54
},
{
"epoch": 0.17871649065800163,
"grad_norm": 0.22030965983867645,
"learning_rate": 7.707894736842105e-05,
"loss": 0.661,
"step": 55
},
{
"epoch": 0.18196588139723802,
"grad_norm": 0.31400930881500244,
"learning_rate": 7.654736842105264e-05,
"loss": 0.7236,
"step": 56
},
{
"epoch": 0.1852152721364744,
"grad_norm": 0.08643659204244614,
"learning_rate": 7.601578947368422e-05,
"loss": 0.4073,
"step": 57
},
{
"epoch": 0.18846466287571081,
"grad_norm": 0.09804336726665497,
"learning_rate": 7.548421052631579e-05,
"loss": 0.5029,
"step": 58
},
{
"epoch": 0.1917140536149472,
"grad_norm": 0.11811637133359909,
"learning_rate": 7.495263157894737e-05,
"loss": 0.5798,
"step": 59
},
{
"epoch": 0.19496344435418358,
"grad_norm": 0.14417557418346405,
"learning_rate": 7.442105263157894e-05,
"loss": 0.6386,
"step": 60
},
{
"epoch": 0.19821283509341997,
"grad_norm": 0.18085996806621552,
"learning_rate": 7.388947368421053e-05,
"loss": 0.6438,
"step": 61
},
{
"epoch": 0.20146222583265638,
"grad_norm": 0.26183441281318665,
"learning_rate": 7.335789473684211e-05,
"loss": 0.6888,
"step": 62
},
{
"epoch": 0.20471161657189277,
"grad_norm": 0.09585064649581909,
"learning_rate": 7.282631578947368e-05,
"loss": 0.3934,
"step": 63
},
{
"epoch": 0.20796100731112915,
"grad_norm": 0.09319886565208435,
"learning_rate": 7.229473684210527e-05,
"loss": 0.5066,
"step": 64
},
{
"epoch": 0.21121039805036557,
"grad_norm": 0.1095961257815361,
"learning_rate": 7.176315789473685e-05,
"loss": 0.5636,
"step": 65
},
{
"epoch": 0.21445978878960195,
"grad_norm": 0.1352343112230301,
"learning_rate": 7.123157894736842e-05,
"loss": 0.6154,
"step": 66
},
{
"epoch": 0.21770917952883834,
"grad_norm": 0.14879107475280762,
"learning_rate": 7.07e-05,
"loss": 0.6146,
"step": 67
},
{
"epoch": 0.22095857026807472,
"grad_norm": 0.21761707961559296,
"learning_rate": 7.016842105263159e-05,
"loss": 0.6835,
"step": 68
},
{
"epoch": 0.22420796100731114,
"grad_norm": 0.12341579049825668,
"learning_rate": 6.963684210526316e-05,
"loss": 0.3693,
"step": 69
},
{
"epoch": 0.22745735174654752,
"grad_norm": 0.09415262937545776,
"learning_rate": 6.910526315789474e-05,
"loss": 0.4705,
"step": 70
},
{
"epoch": 0.2307067424857839,
"grad_norm": 0.11740969866514206,
"learning_rate": 6.857368421052631e-05,
"loss": 0.5741,
"step": 71
},
{
"epoch": 0.23395613322502032,
"grad_norm": 0.1267632246017456,
"learning_rate": 6.80421052631579e-05,
"loss": 0.5971,
"step": 72
},
{
"epoch": 0.2372055239642567,
"grad_norm": 0.15046332776546478,
"learning_rate": 6.751052631578948e-05,
"loss": 0.6468,
"step": 73
},
{
"epoch": 0.2404549147034931,
"grad_norm": 0.1901494562625885,
"learning_rate": 6.697894736842105e-05,
"loss": 0.6531,
"step": 74
},
{
"epoch": 0.2437043054427295,
"grad_norm": 0.384968101978302,
"learning_rate": 6.644736842105264e-05,
"loss": 0.6608,
"step": 75
},
{
"epoch": 0.2469536961819659,
"grad_norm": 0.08166619390249252,
"learning_rate": 6.591578947368422e-05,
"loss": 0.4241,
"step": 76
},
{
"epoch": 0.2502030869212023,
"grad_norm": 0.10392117500305176,
"learning_rate": 6.538421052631579e-05,
"loss": 0.5328,
"step": 77
},
{
"epoch": 0.25345247766043866,
"grad_norm": 0.12360985577106476,
"learning_rate": 6.485263157894737e-05,
"loss": 0.5951,
"step": 78
},
{
"epoch": 0.25670186839967507,
"grad_norm": 0.14392927289009094,
"learning_rate": 6.432105263157894e-05,
"loss": 0.6235,
"step": 79
},
{
"epoch": 0.25995125913891143,
"grad_norm": 0.1928626447916031,
"learning_rate": 6.378947368421053e-05,
"loss": 0.6682,
"step": 80
},
{
"epoch": 0.26320064987814784,
"grad_norm": 0.2776186764240265,
"learning_rate": 6.32578947368421e-05,
"loss": 0.6922,
"step": 81
},
{
"epoch": 0.26645004061738425,
"grad_norm": 0.08794127404689789,
"learning_rate": 6.27263157894737e-05,
"loss": 0.4141,
"step": 82
},
{
"epoch": 0.2696994313566206,
"grad_norm": 0.09980195760726929,
"learning_rate": 6.219473684210527e-05,
"loss": 0.506,
"step": 83
},
{
"epoch": 0.272948822095857,
"grad_norm": 0.11950897425413132,
"learning_rate": 6.166315789473685e-05,
"loss": 0.584,
"step": 84
},
{
"epoch": 0.27619821283509344,
"grad_norm": 0.13808846473693848,
"learning_rate": 6.113157894736842e-05,
"loss": 0.633,
"step": 85
},
{
"epoch": 0.2794476035743298,
"grad_norm": 0.1677071899175644,
"learning_rate": 6.0599999999999996e-05,
"loss": 0.651,
"step": 86
},
{
"epoch": 0.2826969943135662,
"grad_norm": 0.24286992847919464,
"learning_rate": 6.006842105263158e-05,
"loss": 0.6916,
"step": 87
},
{
"epoch": 0.2859463850528026,
"grad_norm": 0.11424943804740906,
"learning_rate": 5.953684210526315e-05,
"loss": 0.4227,
"step": 88
},
{
"epoch": 0.289195775792039,
"grad_norm": 0.09863637387752533,
"learning_rate": 5.900526315789474e-05,
"loss": 0.4951,
"step": 89
},
{
"epoch": 0.2924451665312754,
"grad_norm": 0.11521070450544357,
"learning_rate": 5.847368421052632e-05,
"loss": 0.5696,
"step": 90
},
{
"epoch": 0.2956945572705118,
"grad_norm": 0.1330547034740448,
"learning_rate": 5.79421052631579e-05,
"loss": 0.6131,
"step": 91
},
{
"epoch": 0.29894394800974816,
"grad_norm": 0.145342618227005,
"learning_rate": 5.7410526315789475e-05,
"loss": 0.6349,
"step": 92
},
{
"epoch": 0.3021933387489846,
"grad_norm": 0.2079046219587326,
"learning_rate": 5.687894736842105e-05,
"loss": 0.6669,
"step": 93
},
{
"epoch": 0.30544272948822093,
"grad_norm": 0.12806619703769684,
"learning_rate": 5.6347368421052625e-05,
"loss": 0.4033,
"step": 94
},
{
"epoch": 0.30869212022745735,
"grad_norm": 0.09000737220048904,
"learning_rate": 5.5815789473684214e-05,
"loss": 0.4668,
"step": 95
},
{
"epoch": 0.31194151096669376,
"grad_norm": 0.10959979891777039,
"learning_rate": 5.5284210526315796e-05,
"loss": 0.5692,
"step": 96
},
{
"epoch": 0.3151909017059301,
"grad_norm": 0.1303456574678421,
"learning_rate": 5.475263157894737e-05,
"loss": 0.6188,
"step": 97
},
{
"epoch": 0.31844029244516653,
"grad_norm": 0.1473805159330368,
"learning_rate": 5.422105263157895e-05,
"loss": 0.6352,
"step": 98
},
{
"epoch": 0.32168968318440294,
"grad_norm": 0.1913599818944931,
"learning_rate": 5.368947368421053e-05,
"loss": 0.6548,
"step": 99
},
{
"epoch": 0.3249390739236393,
"grad_norm": 0.4161045551300049,
"learning_rate": 5.3157894736842104e-05,
"loss": 0.6829,
"step": 100
},
{
"epoch": 0.3249390739236393,
"eval_loss": 0.5865235328674316,
"eval_runtime": 34.0488,
"eval_samples_per_second": 121.737,
"eval_steps_per_second": 3.818,
"step": 100
},
{
"epoch": 0.3281884646628757,
"grad_norm": 0.0793076679110527,
"learning_rate": 5.262631578947368e-05,
"loss": 0.4153,
"step": 101
},
{
"epoch": 0.3314378554021121,
"grad_norm": 0.10799999535083771,
"learning_rate": 5.209473684210527e-05,
"loss": 0.5407,
"step": 102
},
{
"epoch": 0.3346872461413485,
"grad_norm": 0.12578538060188293,
"learning_rate": 5.1563157894736844e-05,
"loss": 0.5839,
"step": 103
},
{
"epoch": 0.3379366368805849,
"grad_norm": 0.1581355482339859,
"learning_rate": 5.1031578947368426e-05,
"loss": 0.6407,
"step": 104
},
{
"epoch": 0.3411860276198213,
"grad_norm": 0.1918567270040512,
"learning_rate": 5.05e-05,
"loss": 0.651,
"step": 105
},
{
"epoch": 0.34443541835905767,
"grad_norm": 0.28396371006965637,
"learning_rate": 4.9968421052631576e-05,
"loss": 0.6952,
"step": 106
},
{
"epoch": 0.3476848090982941,
"grad_norm": 0.0913185104727745,
"learning_rate": 4.943684210526316e-05,
"loss": 0.4104,
"step": 107
},
{
"epoch": 0.35093419983753044,
"grad_norm": 0.09755459427833557,
"learning_rate": 4.890526315789474e-05,
"loss": 0.5118,
"step": 108
},
{
"epoch": 0.35418359057676685,
"grad_norm": 0.11383837461471558,
"learning_rate": 4.8373684210526316e-05,
"loss": 0.5756,
"step": 109
},
{
"epoch": 0.35743298131600326,
"grad_norm": 0.13957881927490234,
"learning_rate": 4.784210526315789e-05,
"loss": 0.6268,
"step": 110
},
{
"epoch": 0.3606823720552396,
"grad_norm": 0.16021229326725006,
"learning_rate": 4.731052631578947e-05,
"loss": 0.6517,
"step": 111
},
{
"epoch": 0.36393176279447603,
"grad_norm": 0.22842200100421906,
"learning_rate": 4.6778947368421055e-05,
"loss": 0.6824,
"step": 112
},
{
"epoch": 0.36718115353371245,
"grad_norm": 0.09870794415473938,
"learning_rate": 4.624736842105263e-05,
"loss": 0.3942,
"step": 113
},
{
"epoch": 0.3704305442729488,
"grad_norm": 0.09231571108102798,
"learning_rate": 4.571578947368421e-05,
"loss": 0.4683,
"step": 114
},
{
"epoch": 0.3736799350121852,
"grad_norm": 0.10994287580251694,
"learning_rate": 4.518421052631579e-05,
"loss": 0.555,
"step": 115
},
{
"epoch": 0.37692932575142163,
"grad_norm": 0.13546720147132874,
"learning_rate": 4.465263157894737e-05,
"loss": 0.632,
"step": 116
},
{
"epoch": 0.380178716490658,
"grad_norm": 0.15742133557796478,
"learning_rate": 4.412105263157895e-05,
"loss": 0.6484,
"step": 117
},
{
"epoch": 0.3834281072298944,
"grad_norm": 0.21697165071964264,
"learning_rate": 4.358947368421053e-05,
"loss": 0.66,
"step": 118
},
{
"epoch": 0.3866774979691308,
"grad_norm": 0.13058695197105408,
"learning_rate": 4.30578947368421e-05,
"loss": 0.4018,
"step": 119
},
{
"epoch": 0.38992688870836717,
"grad_norm": 0.09321828931570053,
"learning_rate": 4.2526315789473685e-05,
"loss": 0.4504,
"step": 120
},
{
"epoch": 0.3931762794476036,
"grad_norm": 0.11127794533967972,
"learning_rate": 4.199473684210527e-05,
"loss": 0.5384,
"step": 121
},
{
"epoch": 0.39642567018683994,
"grad_norm": 0.13234351575374603,
"learning_rate": 4.146315789473684e-05,
"loss": 0.5954,
"step": 122
},
{
"epoch": 0.39967506092607635,
"grad_norm": 0.15013480186462402,
"learning_rate": 4.093157894736842e-05,
"loss": 0.6251,
"step": 123
},
{
"epoch": 0.40292445166531277,
"grad_norm": 0.18615896999835968,
"learning_rate": 4.0400000000000006e-05,
"loss": 0.6394,
"step": 124
},
{
"epoch": 0.4061738424045491,
"grad_norm": 0.3680824935436249,
"learning_rate": 3.986842105263158e-05,
"loss": 0.6591,
"step": 125
},
{
"epoch": 0.40942323314378554,
"grad_norm": 0.08504093438386917,
"learning_rate": 3.933684210526316e-05,
"loss": 0.4255,
"step": 126
},
{
"epoch": 0.41267262388302195,
"grad_norm": 0.10568774491548538,
"learning_rate": 3.880526315789473e-05,
"loss": 0.5556,
"step": 127
},
{
"epoch": 0.4159220146222583,
"grad_norm": 0.12182536721229553,
"learning_rate": 3.827368421052632e-05,
"loss": 0.5852,
"step": 128
},
{
"epoch": 0.4191714053614947,
"grad_norm": 0.1525968313217163,
"learning_rate": 3.7742105263157896e-05,
"loss": 0.6436,
"step": 129
},
{
"epoch": 0.42242079610073113,
"grad_norm": 0.1934550702571869,
"learning_rate": 3.721052631578947e-05,
"loss": 0.6896,
"step": 130
},
{
"epoch": 0.4256701868399675,
"grad_norm": 0.28480178117752075,
"learning_rate": 3.6678947368421054e-05,
"loss": 0.6857,
"step": 131
},
{
"epoch": 0.4289195775792039,
"grad_norm": 0.09007880091667175,
"learning_rate": 3.6147368421052636e-05,
"loss": 0.3843,
"step": 132
},
{
"epoch": 0.4321689683184403,
"grad_norm": 0.09930918365716934,
"learning_rate": 3.561578947368421e-05,
"loss": 0.5109,
"step": 133
},
{
"epoch": 0.4354183590576767,
"grad_norm": 0.118385449051857,
"learning_rate": 3.508421052631579e-05,
"loss": 0.5836,
"step": 134
},
{
"epoch": 0.4386677497969131,
"grad_norm": 0.139013409614563,
"learning_rate": 3.455263157894737e-05,
"loss": 0.6216,
"step": 135
},
{
"epoch": 0.44191714053614944,
"grad_norm": 0.16059033572673798,
"learning_rate": 3.402105263157895e-05,
"loss": 0.6354,
"step": 136
},
{
"epoch": 0.44516653127538586,
"grad_norm": 0.24489456415176392,
"learning_rate": 3.3489473684210526e-05,
"loss": 0.6692,
"step": 137
},
{
"epoch": 0.44841592201462227,
"grad_norm": 0.10024583339691162,
"learning_rate": 3.295789473684211e-05,
"loss": 0.3742,
"step": 138
},
{
"epoch": 0.4516653127538586,
"grad_norm": 0.09301915764808655,
"learning_rate": 3.242631578947368e-05,
"loss": 0.4654,
"step": 139
},
{
"epoch": 0.45491470349309504,
"grad_norm": 0.11495403945446014,
"learning_rate": 3.1894736842105265e-05,
"loss": 0.5628,
"step": 140
},
{
"epoch": 0.45816409423233145,
"grad_norm": 0.13312485814094543,
"learning_rate": 3.136315789473685e-05,
"loss": 0.5899,
"step": 141
},
{
"epoch": 0.4614134849715678,
"grad_norm": 0.1566401571035385,
"learning_rate": 3.083157894736842e-05,
"loss": 0.6362,
"step": 142
},
{
"epoch": 0.4646628757108042,
"grad_norm": 0.20983877778053284,
"learning_rate": 3.0299999999999998e-05,
"loss": 0.6727,
"step": 143
},
{
"epoch": 0.46791226645004064,
"grad_norm": 0.13690727949142456,
"learning_rate": 2.9768421052631577e-05,
"loss": 0.4128,
"step": 144
},
{
"epoch": 0.471161657189277,
"grad_norm": 0.0922754630446434,
"learning_rate": 2.923684210526316e-05,
"loss": 0.4803,
"step": 145
},
{
"epoch": 0.4744110479285134,
"grad_norm": 0.10565061867237091,
"learning_rate": 2.8705263157894737e-05,
"loss": 0.547,
"step": 146
},
{
"epoch": 0.4776604386677498,
"grad_norm": 0.13177412748336792,
"learning_rate": 2.8173684210526313e-05,
"loss": 0.6094,
"step": 147
},
{
"epoch": 0.4809098294069862,
"grad_norm": 0.14827612042427063,
"learning_rate": 2.7642105263157898e-05,
"loss": 0.6221,
"step": 148
},
{
"epoch": 0.4841592201462226,
"grad_norm": 0.19302710890769958,
"learning_rate": 2.7110526315789473e-05,
"loss": 0.6625,
"step": 149
},
{
"epoch": 0.487408610885459,
"grad_norm": 0.37328875064849854,
"learning_rate": 2.6578947368421052e-05,
"loss": 0.6865,
"step": 150
},
{
"epoch": 0.487408610885459,
"eval_loss": 0.5818656086921692,
"eval_runtime": 34.0476,
"eval_samples_per_second": 121.741,
"eval_steps_per_second": 3.818,
"step": 150
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.867916955323728e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}