lesso12's picture
Training in progress, step 141, checkpoint
aa0c5d3 verified
raw
history blame
25.8 kB
{
"best_metric": 0.9264618754386902,
"best_model_checkpoint": "miner_id_24/checkpoint-100",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 141,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02127659574468085,
"grad_norm": 14.213656425476074,
"learning_rate": 1.1200000000000001e-05,
"loss": 4.4616,
"step": 1
},
{
"epoch": 0.02127659574468085,
"eval_loss": 3.8836147785186768,
"eval_runtime": 3.7874,
"eval_samples_per_second": 167.135,
"eval_steps_per_second": 5.281,
"step": 1
},
{
"epoch": 0.0425531914893617,
"grad_norm": 7.082957744598389,
"learning_rate": 2.2400000000000002e-05,
"loss": 3.8433,
"step": 2
},
{
"epoch": 0.06382978723404255,
"grad_norm": 5.922336578369141,
"learning_rate": 3.36e-05,
"loss": 3.5573,
"step": 3
},
{
"epoch": 0.0851063829787234,
"grad_norm": 5.772343158721924,
"learning_rate": 4.4800000000000005e-05,
"loss": 3.5562,
"step": 4
},
{
"epoch": 0.10638297872340426,
"grad_norm": 6.365505695343018,
"learning_rate": 5.6e-05,
"loss": 3.1005,
"step": 5
},
{
"epoch": 0.1276595744680851,
"grad_norm": 7.662172794342041,
"learning_rate": 6.72e-05,
"loss": 2.6307,
"step": 6
},
{
"epoch": 0.14893617021276595,
"grad_norm": 7.855373382568359,
"learning_rate": 7.84e-05,
"loss": 2.1008,
"step": 7
},
{
"epoch": 0.1702127659574468,
"grad_norm": 5.307789325714111,
"learning_rate": 8.960000000000001e-05,
"loss": 1.8141,
"step": 8
},
{
"epoch": 0.19148936170212766,
"grad_norm": 2.865940570831299,
"learning_rate": 0.0001008,
"loss": 1.8787,
"step": 9
},
{
"epoch": 0.2127659574468085,
"grad_norm": 1.4380123615264893,
"learning_rate": 0.000112,
"loss": 1.4918,
"step": 10
},
{
"epoch": 0.23404255319148937,
"grad_norm": 1.5105291604995728,
"learning_rate": 0.00011198389746057678,
"loss": 1.4308,
"step": 11
},
{
"epoch": 0.2553191489361702,
"grad_norm": 1.0955153703689575,
"learning_rate": 0.0001119355991027277,
"loss": 1.1133,
"step": 12
},
{
"epoch": 0.2765957446808511,
"grad_norm": 8.190502166748047,
"learning_rate": 0.00011185513270238886,
"loss": 0.7666,
"step": 13
},
{
"epoch": 0.2978723404255319,
"grad_norm": 12.864703178405762,
"learning_rate": 0.00011174254453503828,
"loss": 0.6613,
"step": 14
},
{
"epoch": 0.3191489361702128,
"grad_norm": 2.111825466156006,
"learning_rate": 0.00011159789934908318,
"loss": 1.6219,
"step": 15
},
{
"epoch": 0.3404255319148936,
"grad_norm": 1.2697428464889526,
"learning_rate": 0.00011142128032862395,
"loss": 1.4151,
"step": 16
},
{
"epoch": 0.3617021276595745,
"grad_norm": 0.5486758351325989,
"learning_rate": 0.00011121278904561553,
"loss": 1.2228,
"step": 17
},
{
"epoch": 0.3829787234042553,
"grad_norm": 0.6482474207878113,
"learning_rate": 0.0001109725454014545,
"loss": 1.1537,
"step": 18
},
{
"epoch": 0.40425531914893614,
"grad_norm": 1.133864402770996,
"learning_rate": 0.00011070068755802486,
"loss": 0.7857,
"step": 19
},
{
"epoch": 0.425531914893617,
"grad_norm": 0.4991963803768158,
"learning_rate": 0.00011039737185824234,
"loss": 0.1432,
"step": 20
},
{
"epoch": 0.44680851063829785,
"grad_norm": 0.7302205562591553,
"learning_rate": 0.00011006277273614316,
"loss": 1.3804,
"step": 21
},
{
"epoch": 0.46808510638297873,
"grad_norm": 0.6448777914047241,
"learning_rate": 0.00010969708261656854,
"loss": 1.4363,
"step": 22
},
{
"epoch": 0.48936170212765956,
"grad_norm": 0.6198523044586182,
"learning_rate": 0.00010930051180450303,
"loss": 1.2917,
"step": 23
},
{
"epoch": 0.5106382978723404,
"grad_norm": 0.40335163474082947,
"learning_rate": 0.00010887328836413005,
"loss": 1.2051,
"step": 24
},
{
"epoch": 0.5319148936170213,
"grad_norm": 0.5545335412025452,
"learning_rate": 0.00010841565798767423,
"loss": 0.9898,
"step": 25
},
{
"epoch": 0.5531914893617021,
"grad_norm": 1.7060941457748413,
"learning_rate": 0.00010792788385410628,
"loss": 0.0614,
"step": 26
},
{
"epoch": 0.574468085106383,
"grad_norm": 0.7902534008026123,
"learning_rate": 0.00010741024647779101,
"loss": 1.128,
"step": 27
},
{
"epoch": 0.5957446808510638,
"grad_norm": 0.44602009654045105,
"learning_rate": 0.00010686304354716622,
"loss": 1.4464,
"step": 28
},
{
"epoch": 0.6170212765957447,
"grad_norm": 0.3824542164802551,
"learning_rate": 0.00010628658975354491,
"loss": 1.2856,
"step": 29
},
{
"epoch": 0.6382978723404256,
"grad_norm": 0.42575424909591675,
"learning_rate": 0.00010568121661013911,
"loss": 1.134,
"step": 30
},
{
"epoch": 0.6595744680851063,
"grad_norm": 0.506963312625885,
"learning_rate": 0.00010504727226140981,
"loss": 0.9792,
"step": 31
},
{
"epoch": 0.6808510638297872,
"grad_norm": 0.46332627534866333,
"learning_rate": 0.00010438512128285228,
"loss": 0.3159,
"step": 32
},
{
"epoch": 0.7021276595744681,
"grad_norm": 0.4355514943599701,
"learning_rate": 0.00010369514447133208,
"loss": 0.8945,
"step": 33
},
{
"epoch": 0.723404255319149,
"grad_norm": 0.4772055447101593,
"learning_rate": 0.0001029777386260924,
"loss": 1.4357,
"step": 34
},
{
"epoch": 0.7446808510638298,
"grad_norm": 0.3987302780151367,
"learning_rate": 0.00010223331632055843,
"loss": 1.3066,
"step": 35
},
{
"epoch": 0.7659574468085106,
"grad_norm": 0.37436139583587646,
"learning_rate": 0.00010146230566507025,
"loss": 1.1248,
"step": 36
},
{
"epoch": 0.7872340425531915,
"grad_norm": 0.4493124783039093,
"learning_rate": 0.00010066515006068056,
"loss": 1.0106,
"step": 37
},
{
"epoch": 0.8085106382978723,
"grad_norm": 0.8829596042633057,
"learning_rate": 9.984230794415887e-05,
"loss": 0.4913,
"step": 38
},
{
"epoch": 0.8297872340425532,
"grad_norm": 0.5549487471580505,
"learning_rate": 9.899425252434878e-05,
"loss": 0.6699,
"step": 39
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.4452535808086395,
"learning_rate": 9.812147151002993e-05,
"loss": 1.4244,
"step": 40
},
{
"epoch": 0.8723404255319149,
"grad_norm": 0.35962700843811035,
"learning_rate": 9.722446682944128e-05,
"loss": 1.2756,
"step": 41
},
{
"epoch": 0.8936170212765957,
"grad_norm": 0.3556835949420929,
"learning_rate": 9.630375434162683e-05,
"loss": 1.1866,
"step": 42
},
{
"epoch": 0.9148936170212766,
"grad_norm": 0.34359875321388245,
"learning_rate": 9.53598635397699e-05,
"loss": 1.0799,
"step": 43
},
{
"epoch": 0.9361702127659575,
"grad_norm": 0.5090391039848328,
"learning_rate": 9.43933372466865e-05,
"loss": 0.7165,
"step": 44
},
{
"epoch": 0.9574468085106383,
"grad_norm": 0.42997509241104126,
"learning_rate": 9.340473130265294e-05,
"loss": 1.0119,
"step": 45
},
{
"epoch": 0.9787234042553191,
"grad_norm": 0.3640575408935547,
"learning_rate": 9.239461424574742e-05,
"loss": 1.2632,
"step": 46
},
{
"epoch": 1.0,
"grad_norm": 0.3492790460586548,
"learning_rate": 9.136356698488885e-05,
"loss": 0.9953,
"step": 47
},
{
"epoch": 1.0212765957446808,
"grad_norm": 0.3192073702812195,
"learning_rate": 9.031218246576172e-05,
"loss": 0.21,
"step": 48
},
{
"epoch": 1.0425531914893618,
"grad_norm": 0.434736043214798,
"learning_rate": 8.924106532981847e-05,
"loss": 1.1353,
"step": 49
},
{
"epoch": 1.0638297872340425,
"grad_norm": 0.36542627215385437,
"learning_rate": 8.815083156655581e-05,
"loss": 1.3234,
"step": 50
},
{
"epoch": 1.0638297872340425,
"eval_loss": 0.9605554938316345,
"eval_runtime": 3.8841,
"eval_samples_per_second": 162.971,
"eval_steps_per_second": 5.149,
"step": 50
},
{
"epoch": 1.0851063829787233,
"grad_norm": 0.3345516622066498,
"learning_rate": 8.704210815926495e-05,
"loss": 1.1487,
"step": 51
},
{
"epoch": 1.1063829787234043,
"grad_norm": 0.39309221506118774,
"learning_rate": 8.59155327244593e-05,
"loss": 1.0582,
"step": 52
},
{
"epoch": 1.127659574468085,
"grad_norm": 0.5331593751907349,
"learning_rate": 8.477175314518714e-05,
"loss": 0.8832,
"step": 53
},
{
"epoch": 1.148936170212766,
"grad_norm": 0.8910200595855713,
"learning_rate": 8.361142719844015e-05,
"loss": 0.2747,
"step": 54
},
{
"epoch": 1.1702127659574468,
"grad_norm": 0.35534268617630005,
"learning_rate": 8.243522217687193e-05,
"loss": 0.7217,
"step": 55
},
{
"epoch": 1.1914893617021276,
"grad_norm": 0.44460806250572205,
"learning_rate": 8.124381450504426e-05,
"loss": 1.3437,
"step": 56
},
{
"epoch": 1.2127659574468086,
"grad_norm": 0.37915581464767456,
"learning_rate": 8.00378893504216e-05,
"loss": 1.2378,
"step": 57
},
{
"epoch": 1.2340425531914894,
"grad_norm": 0.3455619513988495,
"learning_rate": 7.881814022933765e-05,
"loss": 1.1035,
"step": 58
},
{
"epoch": 1.2553191489361701,
"grad_norm": 0.39131176471710205,
"learning_rate": 7.758526860816059e-05,
"loss": 0.9421,
"step": 59
},
{
"epoch": 1.2765957446808511,
"grad_norm": 0.3872637450695038,
"learning_rate": 7.633998349988623e-05,
"loss": 0.4359,
"step": 60
},
{
"epoch": 1.297872340425532,
"grad_norm": 0.3637562692165375,
"learning_rate": 7.508300105639138e-05,
"loss": 0.498,
"step": 61
},
{
"epoch": 1.3191489361702127,
"grad_norm": 0.4627838730812073,
"learning_rate": 7.381504415658137e-05,
"loss": 1.3362,
"step": 62
},
{
"epoch": 1.3404255319148937,
"grad_norm": 0.40288859605789185,
"learning_rate": 7.253684199066931e-05,
"loss": 1.2646,
"step": 63
},
{
"epoch": 1.3617021276595744,
"grad_norm": 0.3752979636192322,
"learning_rate": 7.124912964082547e-05,
"loss": 1.0772,
"step": 64
},
{
"epoch": 1.3829787234042552,
"grad_norm": 0.4062426686286926,
"learning_rate": 6.995264765843836e-05,
"loss": 1.0008,
"step": 65
},
{
"epoch": 1.4042553191489362,
"grad_norm": 0.4707888662815094,
"learning_rate": 6.86481416382306e-05,
"loss": 0.6325,
"step": 66
},
{
"epoch": 1.425531914893617,
"grad_norm": 0.28356093168258667,
"learning_rate": 6.733636178947425e-05,
"loss": 0.2474,
"step": 67
},
{
"epoch": 1.4468085106382977,
"grad_norm": 0.44394785165786743,
"learning_rate": 6.601806250455254e-05,
"loss": 1.2399,
"step": 68
},
{
"epoch": 1.4680851063829787,
"grad_norm": 0.3906940519809723,
"learning_rate": 6.469400192511568e-05,
"loss": 1.2571,
"step": 69
},
{
"epoch": 1.4893617021276595,
"grad_norm": 0.40481507778167725,
"learning_rate": 6.33649415060808e-05,
"loss": 1.1472,
"step": 70
},
{
"epoch": 1.5106382978723403,
"grad_norm": 0.3902008533477783,
"learning_rate": 6.203164557772622e-05,
"loss": 1.0084,
"step": 71
},
{
"epoch": 1.5319148936170213,
"grad_norm": 0.47971320152282715,
"learning_rate": 6.069488090613228e-05,
"loss": 0.8643,
"step": 72
},
{
"epoch": 1.5531914893617023,
"grad_norm": 0.35613465309143066,
"learning_rate": 5.935541625222126e-05,
"loss": 0.072,
"step": 73
},
{
"epoch": 1.574468085106383,
"grad_norm": 0.4089168310165405,
"learning_rate": 5.801402192965016e-05,
"loss": 1.0347,
"step": 74
},
{
"epoch": 1.5957446808510638,
"grad_norm": 0.4405708909034729,
"learning_rate": 5.667146936181042e-05,
"loss": 1.2854,
"step": 75
},
{
"epoch": 1.6170212765957448,
"grad_norm": 0.38711073994636536,
"learning_rate": 5.53285306381896e-05,
"loss": 1.2008,
"step": 76
},
{
"epoch": 1.6382978723404256,
"grad_norm": 0.3995616137981415,
"learning_rate": 5.398597807034986e-05,
"loss": 1.0411,
"step": 77
},
{
"epoch": 1.6595744680851063,
"grad_norm": 0.4823427200317383,
"learning_rate": 5.2644583747778746e-05,
"loss": 0.8773,
"step": 78
},
{
"epoch": 1.6808510638297873,
"grad_norm": 0.27052074670791626,
"learning_rate": 5.130511909386772e-05,
"loss": 0.2128,
"step": 79
},
{
"epoch": 1.702127659574468,
"grad_norm": 0.34951841831207275,
"learning_rate": 4.996835442227378e-05,
"loss": 0.6826,
"step": 80
},
{
"epoch": 1.7234042553191489,
"grad_norm": 0.44672834873199463,
"learning_rate": 4.863505849391921e-05,
"loss": 1.3443,
"step": 81
},
{
"epoch": 1.7446808510638299,
"grad_norm": 0.37677329778671265,
"learning_rate": 4.7305998074884325e-05,
"loss": 1.1532,
"step": 82
},
{
"epoch": 1.7659574468085106,
"grad_norm": 0.3660067319869995,
"learning_rate": 4.598193749544746e-05,
"loss": 1.0569,
"step": 83
},
{
"epoch": 1.7872340425531914,
"grad_norm": 0.4302046597003937,
"learning_rate": 4.466363821052573e-05,
"loss": 0.9091,
"step": 84
},
{
"epoch": 1.8085106382978724,
"grad_norm": 0.3698153793811798,
"learning_rate": 4.335185836176942e-05,
"loss": 0.4112,
"step": 85
},
{
"epoch": 1.8297872340425532,
"grad_norm": 0.27242642641067505,
"learning_rate": 4.2047352341561654e-05,
"loss": 0.4239,
"step": 86
},
{
"epoch": 1.851063829787234,
"grad_norm": 0.463748574256897,
"learning_rate": 4.0750870359174544e-05,
"loss": 1.3113,
"step": 87
},
{
"epoch": 1.872340425531915,
"grad_norm": 0.3822973966598511,
"learning_rate": 3.946315800933069e-05,
"loss": 1.2033,
"step": 88
},
{
"epoch": 1.8936170212765957,
"grad_norm": 0.3759160041809082,
"learning_rate": 3.8184955843418635e-05,
"loss": 1.1545,
"step": 89
},
{
"epoch": 1.9148936170212765,
"grad_norm": 0.3939042091369629,
"learning_rate": 3.691699894360862e-05,
"loss": 0.9759,
"step": 90
},
{
"epoch": 1.9361702127659575,
"grad_norm": 0.463613361120224,
"learning_rate": 3.5660016500113756e-05,
"loss": 0.6793,
"step": 91
},
{
"epoch": 1.9574468085106385,
"grad_norm": 0.39532244205474854,
"learning_rate": 3.441473139183941e-05,
"loss": 1.0509,
"step": 92
},
{
"epoch": 1.978723404255319,
"grad_norm": 0.37965285778045654,
"learning_rate": 3.3181859770662366e-05,
"loss": 1.1558,
"step": 93
},
{
"epoch": 2.0,
"grad_norm": 0.4093703627586365,
"learning_rate": 3.196211064957841e-05,
"loss": 0.9204,
"step": 94
},
{
"epoch": 2.021276595744681,
"grad_norm": 0.11672822386026382,
"learning_rate": 3.075618549495574e-05,
"loss": 0.0652,
"step": 95
},
{
"epoch": 2.0425531914893615,
"grad_norm": 0.4452710747718811,
"learning_rate": 2.9564777823128087e-05,
"loss": 1.0477,
"step": 96
},
{
"epoch": 2.0638297872340425,
"grad_norm": 0.4406222999095917,
"learning_rate": 2.8388572801559853e-05,
"loss": 1.2642,
"step": 97
},
{
"epoch": 2.0851063829787235,
"grad_norm": 0.3580534756183624,
"learning_rate": 2.7228246854812867e-05,
"loss": 1.0798,
"step": 98
},
{
"epoch": 2.106382978723404,
"grad_norm": 0.3822851777076721,
"learning_rate": 2.60844672755407e-05,
"loss": 1.0079,
"step": 99
},
{
"epoch": 2.127659574468085,
"grad_norm": 0.4569602608680725,
"learning_rate": 2.4957891840735056e-05,
"loss": 0.8454,
"step": 100
},
{
"epoch": 2.127659574468085,
"eval_loss": 0.9264618754386902,
"eval_runtime": 3.7503,
"eval_samples_per_second": 168.787,
"eval_steps_per_second": 5.333,
"step": 100
},
{
"epoch": 2.148936170212766,
"grad_norm": 0.26321274042129517,
"learning_rate": 2.384916843344419e-05,
"loss": 0.2123,
"step": 101
},
{
"epoch": 2.1702127659574466,
"grad_norm": 0.36982113122940063,
"learning_rate": 2.275893467018154e-05,
"loss": 0.7511,
"step": 102
},
{
"epoch": 2.1914893617021276,
"grad_norm": 0.4143376648426056,
"learning_rate": 2.1687817534238292e-05,
"loss": 1.2891,
"step": 103
},
{
"epoch": 2.2127659574468086,
"grad_norm": 0.3768226206302643,
"learning_rate": 2.0636433015111154e-05,
"loss": 1.127,
"step": 104
},
{
"epoch": 2.2340425531914896,
"grad_norm": 0.3875614106655121,
"learning_rate": 1.9605385754252593e-05,
"loss": 0.9696,
"step": 105
},
{
"epoch": 2.25531914893617,
"grad_norm": 0.4567450284957886,
"learning_rate": 1.8595268697347047e-05,
"loss": 0.8626,
"step": 106
},
{
"epoch": 2.276595744680851,
"grad_norm": 0.43263593316078186,
"learning_rate": 1.76066627533135e-05,
"loss": 0.417,
"step": 107
},
{
"epoch": 2.297872340425532,
"grad_norm": 0.2629643380641937,
"learning_rate": 1.664013646023009e-05,
"loss": 0.3379,
"step": 108
},
{
"epoch": 2.3191489361702127,
"grad_norm": 0.47793322801589966,
"learning_rate": 1.5696245658373157e-05,
"loss": 1.1889,
"step": 109
},
{
"epoch": 2.3404255319148937,
"grad_norm": 0.4138708710670471,
"learning_rate": 1.4775533170558723e-05,
"loss": 1.1401,
"step": 110
},
{
"epoch": 2.3617021276595747,
"grad_norm": 0.4172796905040741,
"learning_rate": 1.3878528489970085e-05,
"loss": 1.0396,
"step": 111
},
{
"epoch": 2.382978723404255,
"grad_norm": 0.4409525692462921,
"learning_rate": 1.3005747475651238e-05,
"loss": 0.9267,
"step": 112
},
{
"epoch": 2.404255319148936,
"grad_norm": 0.49562060832977295,
"learning_rate": 1.2157692055841128e-05,
"loss": 0.6103,
"step": 113
},
{
"epoch": 2.425531914893617,
"grad_norm": 0.16902895271778107,
"learning_rate": 1.1334849939319436e-05,
"loss": 0.1601,
"step": 114
},
{
"epoch": 2.4468085106382977,
"grad_norm": 0.5262559652328491,
"learning_rate": 1.0537694334929756e-05,
"loss": 1.2164,
"step": 115
},
{
"epoch": 2.4680851063829787,
"grad_norm": 0.45780274271965027,
"learning_rate": 9.766683679441566e-06,
"loss": 1.2017,
"step": 116
},
{
"epoch": 2.4893617021276597,
"grad_norm": 0.4124818444252014,
"learning_rate": 9.022261373907599e-06,
"loss": 1.0688,
"step": 117
},
{
"epoch": 2.5106382978723403,
"grad_norm": 0.41770249605178833,
"learning_rate": 8.304855528667915e-06,
"loss": 0.893,
"step": 118
},
{
"epoch": 2.5319148936170213,
"grad_norm": 0.5359745025634766,
"learning_rate": 7.614878717147731e-06,
"loss": 0.734,
"step": 119
},
{
"epoch": 2.5531914893617023,
"grad_norm": 0.12150562554597855,
"learning_rate": 6.952727738590198e-06,
"loss": 0.0908,
"step": 120
},
{
"epoch": 2.574468085106383,
"grad_norm": 0.45060330629348755,
"learning_rate": 6.318783389860888e-06,
"loss": 0.9969,
"step": 121
},
{
"epoch": 2.595744680851064,
"grad_norm": 0.4552464485168457,
"learning_rate": 5.7134102464550925e-06,
"loss": 1.2133,
"step": 122
},
{
"epoch": 2.617021276595745,
"grad_norm": 0.41424861550331116,
"learning_rate": 5.136956452833776e-06,
"loss": 1.0531,
"step": 123
},
{
"epoch": 2.6382978723404253,
"grad_norm": 0.4196318984031677,
"learning_rate": 4.589753522209003e-06,
"loss": 0.9811,
"step": 124
},
{
"epoch": 2.6595744680851063,
"grad_norm": 0.4966506063938141,
"learning_rate": 4.072116145893723e-06,
"loss": 0.8532,
"step": 125
},
{
"epoch": 2.6808510638297873,
"grad_norm": 0.2934734523296356,
"learning_rate": 3.584342012325771e-06,
"loss": 0.1873,
"step": 126
},
{
"epoch": 2.702127659574468,
"grad_norm": 0.3809848725795746,
"learning_rate": 3.126711635869966e-06,
"loss": 0.6348,
"step": 127
},
{
"epoch": 2.723404255319149,
"grad_norm": 0.463334858417511,
"learning_rate": 2.699488195496971e-06,
"loss": 1.2586,
"step": 128
},
{
"epoch": 2.74468085106383,
"grad_norm": 0.43426749110221863,
"learning_rate": 2.3029173834314634e-06,
"loss": 1.1442,
"step": 129
},
{
"epoch": 2.7659574468085104,
"grad_norm": 0.4165303111076355,
"learning_rate": 1.9372272638568494e-06,
"loss": 1.0423,
"step": 130
},
{
"epoch": 2.7872340425531914,
"grad_norm": 0.48008161783218384,
"learning_rate": 1.6026281417576689e-06,
"loss": 0.8429,
"step": 131
},
{
"epoch": 2.8085106382978724,
"grad_norm": 0.45738592743873596,
"learning_rate": 1.299312441975153e-06,
"loss": 0.3767,
"step": 132
},
{
"epoch": 2.829787234042553,
"grad_norm": 0.350779265165329,
"learning_rate": 1.0274545985455078e-06,
"loss": 0.5226,
"step": 133
},
{
"epoch": 2.851063829787234,
"grad_norm": 0.49393415451049805,
"learning_rate": 7.872109543844799e-07,
"loss": 1.2395,
"step": 134
},
{
"epoch": 2.872340425531915,
"grad_norm": 0.4229400157928467,
"learning_rate": 5.787196713760618e-07,
"loss": 1.1422,
"step": 135
},
{
"epoch": 2.8936170212765955,
"grad_norm": 0.42691662907600403,
"learning_rate": 4.021006509168048e-07,
"loss": 1.0496,
"step": 136
},
{
"epoch": 2.9148936170212765,
"grad_norm": 0.4528738260269165,
"learning_rate": 2.574554649617209e-07,
"loss": 0.905,
"step": 137
},
{
"epoch": 2.9361702127659575,
"grad_norm": 0.4881468713283539,
"learning_rate": 1.4486729761113447e-07,
"loss": 0.5696,
"step": 138
},
{
"epoch": 2.9574468085106385,
"grad_norm": 0.412494957447052,
"learning_rate": 6.440089727230269e-08,
"loss": 0.7729,
"step": 139
},
{
"epoch": 2.978723404255319,
"grad_norm": 0.4168694317340851,
"learning_rate": 1.6102539423217266e-08,
"loss": 1.1272,
"step": 140
},
{
"epoch": 3.0,
"grad_norm": 0.48817628622055054,
"learning_rate": 0.0,
"loss": 0.8255,
"step": 141
}
],
"logging_steps": 1,
"max_steps": 141,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.466930952990884e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}