Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +0 -0
scheduler.pt +0 -0
trainer_state.json +633 -3

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:670944acbbf85d809895ecabf59d038c6a56eec83ecc7dbc2170456ff81fefa9
 size 1426462208

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e16bc3c19db986ac4ca912dd200a709fa26a7b0cce2c7ad804756f07f9764ae
 size 1426462208

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89b33001d76783cd0c9e2e7bf2659d37c66658149ddb1c1250c7d8f9aa23e50
 size 2853107898

 version https://git-lfs.github.com/spec/v1
+oid sha256:39de36962e52b3f73461bf26e39adafd9fc4118dc96dfa826b517795df14d8da
 size 2853107898

rng_state.pth CHANGED Viewed

Binary files a/rng_state.pth and b/rng_state.pth differ

scheduler.pt CHANGED Viewed

Binary files a/scheduler.pt and b/scheduler.pt differ

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.46627524414171784,
   "eval_steps": 500,
-  "global_step": 1000000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -707,6 +707,636 @@
       "learning_rate": 2.6694910512455145e-05,
       "loss": 2.4431,
       "step": 1000000
     }
   ],
   "logging_steps": 10000,
@@ -726,7 +1356,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.4657896443479982e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8859229638692638,
   "eval_steps": 500,
+  "global_step": 1900000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.6694910512455145e-05,
       "loss": 2.4431,
       "step": 1000000
+    },
+    {
+      "epoch": 0.470937996583135,
+      "grad_norm": 0.8998690843582153,
+      "learning_rate": 2.6461866145433116e-05,
+      "loss": 2.4419,
+      "step": 1010000
+    },
+    {
+      "epoch": 0.4756007490245522,
+      "grad_norm": 0.9611771702766418,
+      "learning_rate": 2.6228821778411084e-05,
+      "loss": 2.4425,
+      "step": 1020000
+    },
+    {
+      "epoch": 0.48026350146596936,
+      "grad_norm": 1.0314167737960815,
+      "learning_rate": 2.5995754097626846e-05,
+      "loss": 2.4393,
+      "step": 1030000
+    },
+    {
+      "epoch": 0.48492625390738653,
+      "grad_norm": 1.0637314319610596,
+      "learning_rate": 2.5762709730604817e-05,
+      "loss": 2.4373,
+      "step": 1040000
+    },
+    {
+      "epoch": 0.4895890063488037,
+      "grad_norm": 1.1488375663757324,
+      "learning_rate": 2.552964204982058e-05,
+      "loss": 2.4348,
+      "step": 1050000
+    },
+    {
+      "epoch": 0.49425175879022093,
+      "grad_norm": 1.1408883333206177,
+      "learning_rate": 2.529657436903634e-05,
+      "loss": 2.4352,
+      "step": 1060000
+    },
+    {
+      "epoch": 0.4989145112316381,
+      "grad_norm": 0.9368888735771179,
+      "learning_rate": 2.5063530002014312e-05,
+      "loss": 2.4321,
+      "step": 1070000
+    },
+    {
+      "epoch": 0.5035772636730552,
+      "grad_norm": 0.9766519069671631,
+      "learning_rate": 2.483046232123007e-05,
+      "loss": 2.4319,
+      "step": 1080000
+    },
+    {
+      "epoch": 0.5082400161144724,
+      "grad_norm": 0.9681417942047119,
+      "learning_rate": 2.4597417954208042e-05,
+      "loss": 2.4329,
+      "step": 1090000
+    },
+    {
+      "epoch": 0.5129027685558896,
+      "grad_norm": 1.0913608074188232,
+      "learning_rate": 2.4364373587186014e-05,
+      "loss": 2.428,
+      "step": 1100000
+    },
+    {
+      "epoch": 0.5175655209973068,
+      "grad_norm": 0.9350466132164001,
+      "learning_rate": 2.413132922016398e-05,
+      "loss": 2.4241,
+      "step": 1110000
+    },
+    {
+      "epoch": 0.522228273438724,
+      "grad_norm": 0.8676067590713501,
+      "learning_rate": 2.3898261539379743e-05,
+      "loss": 2.4227,
+      "step": 1120000
+    },
+    {
+      "epoch": 0.5268910258801411,
+      "grad_norm": 0.8786779046058655,
+      "learning_rate": 2.366521717235771e-05,
+      "loss": 2.4208,
+      "step": 1130000
+    },
+    {
+      "epoch": 0.5315537783215584,
+      "grad_norm": 0.9234575629234314,
+      "learning_rate": 2.3432126177811267e-05,
+      "loss": 2.4249,
+      "step": 1140000
+    },
+    {
+      "epoch": 0.5362165307629755,
+      "grad_norm": 1.3854731321334839,
+      "learning_rate": 2.3199105124551445e-05,
+      "loss": 2.4245,
+      "step": 1150000
+    },
+    {
+      "epoch": 0.5408792832043927,
+      "grad_norm": 1.0635942220687866,
+      "learning_rate": 2.2966060757529416e-05,
+      "loss": 2.4202,
+      "step": 1160000
+    },
+    {
+      "epoch": 0.5455420356458098,
+      "grad_norm": 0.8858787417411804,
+      "learning_rate": 2.2732993076745178e-05,
+      "loss": 2.418,
+      "step": 1170000
+    },
+    {
+      "epoch": 0.5502047880872271,
+      "grad_norm": 1.309348702430725,
+      "learning_rate": 2.2499948709723146e-05,
+      "loss": 2.4148,
+      "step": 1180000
+    },
+    {
+      "epoch": 0.5548675405286442,
+      "grad_norm": 0.9109322428703308,
+      "learning_rate": 2.2266904342701114e-05,
+      "loss": 2.4151,
+      "step": 1190000
+    },
+    {
+      "epoch": 0.5595302929700614,
+      "grad_norm": 1.0102558135986328,
+      "learning_rate": 2.2033836661916876e-05,
+      "loss": 2.4134,
+      "step": 1200000
+    },
+    {
+      "epoch": 0.5641930454114785,
+      "grad_norm": 1.0807286500930786,
+      "learning_rate": 2.1800815608657053e-05,
+      "loss": 2.4135,
+      "step": 1210000
+    },
+    {
+      "epoch": 0.5688557978528958,
+      "grad_norm": 0.9256259799003601,
+      "learning_rate": 2.156774792787282e-05,
+      "loss": 2.4106,
+      "step": 1220000
+    },
+    {
+      "epoch": 0.573518550294313,
+      "grad_norm": 0.9988642930984497,
+      "learning_rate": 2.1334703560850786e-05,
+      "loss": 2.4127,
+      "step": 1230000
+    },
+    {
+      "epoch": 0.5781813027357301,
+      "grad_norm": 1.1274610757827759,
+      "learning_rate": 2.1101659193828754e-05,
+      "loss": 2.4096,
+      "step": 1240000
+    },
+    {
+      "epoch": 0.5828440551771473,
+      "grad_norm": 1.0102494955062866,
+      "learning_rate": 2.0868591513044516e-05,
+      "loss": 2.4066,
+      "step": 1250000
+    },
+    {
+      "epoch": 0.5875068076185644,
+      "grad_norm": 1.011261224746704,
+      "learning_rate": 2.0635547146022484e-05,
+      "loss": 2.4046,
+      "step": 1260000
+    },
+    {
+      "epoch": 0.5921695600599817,
+      "grad_norm": 1.1067317724227905,
+      "learning_rate": 2.0402502779000456e-05,
+      "loss": 2.4054,
+      "step": 1270000
+    },
+    {
+      "epoch": 0.5968323125013988,
+      "grad_norm": 1.117375135421753,
+      "learning_rate": 2.0169435098216217e-05,
+      "loss": 2.4057,
+      "step": 1280000
+    },
+    {
+      "epoch": 0.601495064942816,
+      "grad_norm": 1.067470669746399,
+      "learning_rate": 1.993636741743198e-05,
+      "loss": 2.4066,
+      "step": 1290000
+    },
+    {
+      "epoch": 0.6061578173842331,
+      "grad_norm": 1.0158133506774902,
+      "learning_rate": 1.9703346364172157e-05,
+      "loss": 2.4029,
+      "step": 1300000
+    },
+    {
+      "epoch": 0.6108205698256504,
+      "grad_norm": 1.1179207563400269,
+      "learning_rate": 1.947027868338792e-05,
+      "loss": 2.4006,
+      "step": 1310000
+    },
+    {
+      "epoch": 0.6154833222670676,
+      "grad_norm": 0.8885159492492676,
+      "learning_rate": 1.923721100260368e-05,
+      "loss": 2.4008,
+      "step": 1320000
+    },
+    {
+      "epoch": 0.6201460747084847,
+      "grad_norm": 0.9562169313430786,
+      "learning_rate": 1.9004143321819446e-05,
+      "loss": 2.4014,
+      "step": 1330000
+    },
+    {
+      "epoch": 0.6248088271499019,
+      "grad_norm": 1.0893275737762451,
+      "learning_rate": 1.8771098954797414e-05,
+      "loss": 2.3992,
+      "step": 1340000
+    },
+    {
+      "epoch": 0.629471579591319,
+      "grad_norm": 1.1396783590316772,
+      "learning_rate": 1.853807790153759e-05,
+      "loss": 2.3961,
+      "step": 1350000
+    },
+    {
+      "epoch": 0.6341343320327363,
+      "grad_norm": 0.894639790058136,
+      "learning_rate": 1.830503353451556e-05,
+      "loss": 2.3949,
+      "step": 1360000
+    },
+    {
+      "epoch": 0.6387970844741534,
+      "grad_norm": 1.0523122549057007,
+      "learning_rate": 1.807196585373132e-05,
+      "loss": 2.3924,
+      "step": 1370000
+    },
+    {
+      "epoch": 0.6434598369155706,
+      "grad_norm": 1.4329748153686523,
+      "learning_rate": 1.7838898172947086e-05,
+      "loss": 2.3965,
+      "step": 1380000
+    },
+    {
+      "epoch": 0.6481225893569877,
+      "grad_norm": 0.9407207369804382,
+      "learning_rate": 1.7605853805925054e-05,
+      "loss": 2.3944,
+      "step": 1390000
+    },
+    {
+      "epoch": 0.652785341798405,
+      "grad_norm": 1.1153851747512817,
+      "learning_rate": 1.7372809438903022e-05,
+      "loss": 2.3951,
+      "step": 1400000
+    },
+    {
+      "epoch": 0.6574480942398221,
+      "grad_norm": 1.4270461797714233,
+      "learning_rate": 1.7139741758118784e-05,
+      "loss": 2.3903,
+      "step": 1410000
+    },
+    {
+      "epoch": 0.6621108466812393,
+      "grad_norm": 0.9156707525253296,
+      "learning_rate": 1.6906697391096756e-05,
+      "loss": 2.387,
+      "step": 1420000
+    },
+    {
+      "epoch": 0.6667735991226565,
+      "grad_norm": 1.0517213344573975,
+      "learning_rate": 1.6673653024074724e-05,
+      "loss": 2.3908,
+      "step": 1430000
+    },
+    {
+      "epoch": 0.6714363515640737,
+      "grad_norm": 1.1789027452468872,
+      "learning_rate": 1.64406319708149e-05,
+      "loss": 2.3857,
+      "step": 1440000
+    },
+    {
+      "epoch": 0.6760991040054909,
+      "grad_norm": 0.9410611391067505,
+      "learning_rate": 1.6207564290030663e-05,
+      "loss": 2.3851,
+      "step": 1450000
+    },
+    {
+      "epoch": 0.680761856446908,
+      "grad_norm": 1.2597123384475708,
+      "learning_rate": 1.597451992300863e-05,
+      "loss": 2.3853,
+      "step": 1460000
+    },
+    {
+      "epoch": 0.6854246088883252,
+      "grad_norm": 1.111659288406372,
+      "learning_rate": 1.5741452242224393e-05,
+      "loss": 2.3849,
+      "step": 1470000
+    },
+    {
+      "epoch": 0.6900873613297424,
+      "grad_norm": 1.114686131477356,
+      "learning_rate": 1.5508384561440158e-05,
+      "loss": 2.3844,
+      "step": 1480000
+    },
+    {
+      "epoch": 0.6947501137711596,
+      "grad_norm": 1.3087519407272339,
+      "learning_rate": 1.527531688065592e-05,
+      "loss": 2.3811,
+      "step": 1490000
+    },
+    {
+      "epoch": 0.6994128662125767,
+      "grad_norm": 1.2704778909683228,
+      "learning_rate": 1.5042319141158304e-05,
+      "loss": 2.3793,
+      "step": 1500000
+    },
+    {
+      "epoch": 0.7040756186539939,
+      "grad_norm": 1.0817821025848389,
+      "learning_rate": 1.4809251460374065e-05,
+      "loss": 2.3793,
+      "step": 1510000
+    },
+    {
+      "epoch": 0.7087383710954112,
+      "grad_norm": 1.1640921831130981,
+      "learning_rate": 1.4576230407114241e-05,
+      "loss": 2.3826,
+      "step": 1520000
+    },
+    {
+      "epoch": 0.7134011235368283,
+      "grad_norm": 1.5091464519500732,
+      "learning_rate": 1.4343162726330003e-05,
+      "loss": 2.379,
+      "step": 1530000
+    },
+    {
+      "epoch": 0.7180638759782455,
+      "grad_norm": 1.3562886714935303,
+      "learning_rate": 1.4110118359307974e-05,
+      "loss": 2.3748,
+      "step": 1540000
+    },
+    {
+      "epoch": 0.7227266284196626,
+      "grad_norm": 0.9998787641525269,
+      "learning_rate": 1.3877073992285944e-05,
+      "loss": 2.375,
+      "step": 1550000
+    },
+    {
+      "epoch": 0.7273893808610798,
+      "grad_norm": 1.163294792175293,
+      "learning_rate": 1.3644006311501706e-05,
+      "loss": 2.3776,
+      "step": 1560000
+    },
+    {
+      "epoch": 0.732052133302497,
+      "grad_norm": 1.0799118280410767,
+      "learning_rate": 1.3410985258241882e-05,
+      "loss": 2.3732,
+      "step": 1570000
+    },
+    {
+      "epoch": 0.7367148857439142,
+      "grad_norm": 0.9467183351516724,
+      "learning_rate": 1.317794089121985e-05,
+      "loss": 2.3705,
+      "step": 1580000
+    },
+    {
+      "epoch": 0.7413776381853313,
+      "grad_norm": 1.2810046672821045,
+      "learning_rate": 1.2944873210435612e-05,
+      "loss": 2.3721,
+      "step": 1590000
+    },
+    {
+      "epoch": 0.7460403906267485,
+      "grad_norm": 1.2798866033554077,
+      "learning_rate": 1.2711828843413585e-05,
+      "loss": 2.3738,
+      "step": 1600000
+    },
+    {
+      "epoch": 0.7507031430681657,
+      "grad_norm": 1.221845030784607,
+      "learning_rate": 1.2478784476391553e-05,
+      "loss": 2.3683,
+      "step": 1610000
+    },
+    {
+      "epoch": 0.7553658955095829,
+      "grad_norm": 1.2743821144104004,
+      "learning_rate": 1.2245740109369522e-05,
+      "loss": 2.3724,
+      "step": 1620000
+    },
+    {
+      "epoch": 0.7600286479510001,
+      "grad_norm": 1.1069179773330688,
+      "learning_rate": 1.201269574234749e-05,
+      "loss": 2.3662,
+      "step": 1630000
+    },
+    {
+      "epoch": 0.7646914003924172,
+      "grad_norm": 1.4689267873764038,
+      "learning_rate": 1.177965137532546e-05,
+      "loss": 2.3713,
+      "step": 1640000
+    },
+    {
+      "epoch": 0.7693541528338345,
+      "grad_norm": 1.0129334926605225,
+      "learning_rate": 1.154660700830343e-05,
+      "loss": 2.3689,
+      "step": 1650000
+    },
+    {
+      "epoch": 0.7740169052752516,
+      "grad_norm": 0.9776953458786011,
+      "learning_rate": 1.1313539327519193e-05,
+      "loss": 2.363,
+      "step": 1660000
+    },
+    {
+      "epoch": 0.7786796577166688,
+      "grad_norm": 1.1849191188812256,
+      "learning_rate": 1.1080494960497161e-05,
+      "loss": 2.3671,
+      "step": 1670000
+    },
+    {
+      "epoch": 0.7833424101580859,
+      "grad_norm": 1.0659184455871582,
+      "learning_rate": 1.0847427279712923e-05,
+      "loss": 2.363,
+      "step": 1680000
+    },
+    {
+      "epoch": 0.7880051625995032,
+      "grad_norm": 1.0228557586669922,
+      "learning_rate": 1.0614382912690895e-05,
+      "loss": 2.362,
+      "step": 1690000
+    },
+    {
+      "epoch": 0.7926679150409203,
+      "grad_norm": 0.9540805816650391,
+      "learning_rate": 1.0381315231906656e-05,
+      "loss": 2.366,
+      "step": 1700000
+    },
+    {
+      "epoch": 0.7973306674823375,
+      "grad_norm": 1.1381940841674805,
+      "learning_rate": 1.0148270864884624e-05,
+      "loss": 2.3592,
+      "step": 1710000
+    },
+    {
+      "epoch": 0.8019934199237547,
+      "grad_norm": 1.1460505723953247,
+      "learning_rate": 9.915226497862596e-06,
+      "loss": 2.3591,
+      "step": 1720000
+    },
+    {
+      "epoch": 0.8066561723651718,
+      "grad_norm": 1.0586894750595093,
+      "learning_rate": 9.682182130840564e-06,
+      "loss": 2.3592,
+      "step": 1730000
+    },
+    {
+      "epoch": 0.8113189248065891,
+      "grad_norm": 1.3877402544021606,
+      "learning_rate": 9.449114450056326e-06,
+      "loss": 2.3635,
+      "step": 1740000
+    },
+    {
+      "epoch": 0.8159816772480062,
+      "grad_norm": 1.2622848749160767,
+      "learning_rate": 9.216046769272089e-06,
+      "loss": 2.3577,
+      "step": 1750000
+    },
+    {
+      "epoch": 0.8206444296894234,
+      "grad_norm": 1.1290611028671265,
+      "learning_rate": 8.983002402250059e-06,
+      "loss": 2.3587,
+      "step": 1760000
+    },
+    {
+      "epoch": 0.8253071821308405,
+      "grad_norm": 1.0407214164733887,
+      "learning_rate": 8.74993472146582e-06,
+      "loss": 2.3562,
+      "step": 1770000
+    },
+    {
+      "epoch": 0.8299699345722578,
+      "grad_norm": 1.1062073707580566,
+      "learning_rate": 8.51689035444379e-06,
+      "loss": 2.358,
+      "step": 1780000
+    },
+    {
+      "epoch": 0.8346326870136749,
+      "grad_norm": 1.04072904586792,
+      "learning_rate": 8.28384598742176e-06,
+      "loss": 2.3518,
+      "step": 1790000
+    },
+    {
+      "epoch": 0.8392954394550921,
+      "grad_norm": 1.0454237461090088,
+      "learning_rate": 8.050801620399728e-06,
+      "loss": 2.3587,
+      "step": 1800000
+    },
+    {
+      "epoch": 0.8439581918965092,
+      "grad_norm": 1.2492414712905884,
+      "learning_rate": 7.817780567139906e-06,
+      "loss": 2.3552,
+      "step": 1810000
+    },
+    {
+      "epoch": 0.8486209443379265,
+      "grad_norm": 1.2101612091064453,
+      "learning_rate": 7.584712886355667e-06,
+      "loss": 2.358,
+      "step": 1820000
+    },
+    {
+      "epoch": 0.8532836967793437,
+      "grad_norm": 1.0315169095993042,
+      "learning_rate": 7.351668519333638e-06,
+      "loss": 2.3515,
+      "step": 1830000
+    },
+    {
+      "epoch": 0.8579464492207608,
+      "grad_norm": 1.130194902420044,
+      "learning_rate": 7.118624152311607e-06,
+      "loss": 2.3535,
+      "step": 1840000
+    },
+    {
+      "epoch": 0.862609201662178,
+      "grad_norm": 1.1591068506240845,
+      "learning_rate": 6.885579785289576e-06,
+      "loss": 2.3484,
+      "step": 1850000
+    },
+    {
+      "epoch": 0.8672719541035951,
+      "grad_norm": 1.1694544553756714,
+      "learning_rate": 6.652535418267545e-06,
+      "loss": 2.3473,
+      "step": 1860000
+    },
+    {
+      "epoch": 0.8719347065450124,
+      "grad_norm": 1.2773854732513428,
+      "learning_rate": 6.419491051245514e-06,
+      "loss": 2.3464,
+      "step": 1870000
+    },
+    {
+      "epoch": 0.8765974589864295,
+      "grad_norm": 1.0938977003097534,
+      "learning_rate": 6.186423370461277e-06,
+      "loss": 2.3468,
+      "step": 1880000
+    },
+    {
+      "epoch": 0.8812602114278467,
+      "grad_norm": 1.178916573524475,
+      "learning_rate": 5.9534023172014535e-06,
+      "loss": 2.3455,
+      "step": 1890000
+    },
+    {
+      "epoch": 0.8859229638692638,
+      "grad_norm": 1.2058972120285034,
+      "learning_rate": 5.720334636417215e-06,
+      "loss": 2.3433,
+      "step": 1900000
     }
   ],
   "logging_steps": 10000,
       "attributes": {}
     }
   },
+  "total_flos": 4.684660451731086e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null