samtuckervegan commited on Feb 22

Commit

72e4137

verified ·

1 Parent(s): 154cd4c

Upload folder using huggingface_hub

Browse files

Files changed (21) hide show

README.md +28 -0
checkpoint-4916/config.json +32 -0
checkpoint-4916/model.safetensors +3 -0
checkpoint-4916/optimizer.pt +3 -0
checkpoint-4916/rng_state_0.pth +3 -0
checkpoint-4916/rng_state_1.pth +3 -0
checkpoint-4916/rng_state_2.pth +3 -0
checkpoint-4916/rng_state_3.pth +3 -0
checkpoint-4916/scheduler.pt +3 -0
checkpoint-4916/trainer_state.json +1440 -0
checkpoint-4916/training_args.bin +3 -0
config.json +32 -0
model.safetensors +3 -0
runs/Feb22_02-13-45_r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih/events.out.tfevents.1740190427.r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih.208.0 +2 -2
runs/Feb22_02-13-45_r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih/events.out.tfevents.1740191139.r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih.208.1 +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+tags:
+- autotrain
+- text-regression
+base_model: distilbert/distilbert-base-uncased
+widget:
+- text: "I love AutoTrain"
+datasets:
+- samtuckervegan/text_performance
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Regression
+## Validation Metrics
+loss: 0.03380444645881653
+mse: 0.033803146332502365
+mae: 0.14292894303798676
+r2: 0.2760580778121948
+rmse: 0.18385631980571776
+explained_variance: 0.2763633728027344

checkpoint-4916/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "distilbert/distilbert-base-uncased",
+  "_num_labels": 1,
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "target"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "target": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "vocab_size": 30522
+}

checkpoint-4916/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52992a6b1bb7b233e4054bc086b088013d7e7613bcffead944d24fd70d64c55e
+size 267829484

checkpoint-4916/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64c1456eb332d082a347d7cad4735bc4250845738e02e04a63fa9f17e819942a
+size 535721146

checkpoint-4916/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8fac6987c200600e610946b53cf319ed29c1564951f6e89145ce16858af5139
+size 15024

checkpoint-4916/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0f8befb7da5855d4149667159792c7f547534617d1616470ae82b4e1c4af49
+size 15024

checkpoint-4916/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0ca0b967f2e548cdf3eee611a80ad48edb340060335df03d47e61c93cbc6aa
+size 15024

checkpoint-4916/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:754aa36cc24cc6e3f8bda45f9c465b2a33ed0e611d1fa8443bdfc2f99b182652
+size 15024

checkpoint-4916/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b39d933709e10bf97e1b41f5aee68f3dcc996b1173e11cc1c55ae6d677f967fa
+size 1064

checkpoint-4916/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1440 @@

+{
+  "best_metric": 0.03380444645881653,
+  "best_model_checkpoint": "textprediction/checkpoint-4916",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 4916,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01017087062652563,
+      "grad_norm": 4.562432289123535,
+      "learning_rate": 1.6260162601626018e-06,
+      "loss": 0.1968,
+      "step": 25
+    },
+    {
+      "epoch": 0.02034174125305126,
+      "grad_norm": 1.4055429697036743,
+      "learning_rate": 3.3197831978319785e-06,
+      "loss": 0.0977,
+      "step": 50
+    },
+    {
+      "epoch": 0.030512611879576892,
+      "grad_norm": 0.8549578189849854,
+      "learning_rate": 5.013550135501355e-06,
+      "loss": 0.0498,
+      "step": 75
+    },
+    {
+      "epoch": 0.04068348250610252,
+      "grad_norm": 0.41762399673461914,
+      "learning_rate": 6.707317073170733e-06,
+      "loss": 0.0469,
+      "step": 100
+    },
+    {
+      "epoch": 0.050854353132628156,
+      "grad_norm": 0.4172782897949219,
+      "learning_rate": 8.401084010840109e-06,
+      "loss": 0.0487,
+      "step": 125
+    },
+    {
+      "epoch": 0.061025223759153785,
+      "grad_norm": 0.45810267329216003,
+      "learning_rate": 1.0094850948509485e-05,
+      "loss": 0.0449,
+      "step": 150
+    },
+    {
+      "epoch": 0.07119609438567942,
+      "grad_norm": 0.41910672187805176,
+      "learning_rate": 1.1788617886178862e-05,
+      "loss": 0.0417,
+      "step": 175
+    },
+    {
+      "epoch": 0.08136696501220504,
+      "grad_norm": 0.36320051550865173,
+      "learning_rate": 1.348238482384824e-05,
+      "loss": 0.0421,
+      "step": 200
+    },
+    {
+      "epoch": 0.09153783563873068,
+      "grad_norm": 0.434171199798584,
+      "learning_rate": 1.5176151761517615e-05,
+      "loss": 0.0465,
+      "step": 225
+    },
+    {
+      "epoch": 0.10170870626525631,
+      "grad_norm": 0.3281106948852539,
+      "learning_rate": 1.6869918699186994e-05,
+      "loss": 0.0433,
+      "step": 250
+    },
+    {
+      "epoch": 0.11187957689178193,
+      "grad_norm": 0.36068961024284363,
+      "learning_rate": 1.856368563685637e-05,
+      "loss": 0.0439,
+      "step": 275
+    },
+    {
+      "epoch": 0.12205044751830757,
+      "grad_norm": 0.5031670928001404,
+      "learning_rate": 2.0257452574525744e-05,
+      "loss": 0.0423,
+      "step": 300
+    },
+    {
+      "epoch": 0.1322213181448332,
+      "grad_norm": 1.1825827360153198,
+      "learning_rate": 2.1951219512195124e-05,
+      "loss": 0.0427,
+      "step": 325
+    },
+    {
+      "epoch": 0.14239218877135884,
+      "grad_norm": 0.3233006000518799,
+      "learning_rate": 2.36449864498645e-05,
+      "loss": 0.0434,
+      "step": 350
+    },
+    {
+      "epoch": 0.15256305939788445,
+      "grad_norm": 0.4184306561946869,
+      "learning_rate": 2.5338753387533877e-05,
+      "loss": 0.0423,
+      "step": 375
+    },
+    {
+      "epoch": 0.16273393002441008,
+      "grad_norm": 0.38505345582962036,
+      "learning_rate": 2.7032520325203254e-05,
+      "loss": 0.0415,
+      "step": 400
+    },
+    {
+      "epoch": 0.17290480065093572,
+      "grad_norm": 0.3058652877807617,
+      "learning_rate": 2.872628726287263e-05,
+      "loss": 0.0431,
+      "step": 425
+    },
+    {
+      "epoch": 0.18307567127746135,
+      "grad_norm": 0.43836578726768494,
+      "learning_rate": 3.0420054200542007e-05,
+      "loss": 0.0417,
+      "step": 450
+    },
+    {
+      "epoch": 0.193246541903987,
+      "grad_norm": 0.5372226238250732,
+      "learning_rate": 3.2113821138211384e-05,
+      "loss": 0.0387,
+      "step": 475
+    },
+    {
+      "epoch": 0.20341741253051263,
+      "grad_norm": 0.4316234886646271,
+      "learning_rate": 3.380758807588076e-05,
+      "loss": 0.0425,
+      "step": 500
+    },
+    {
+      "epoch": 0.21358828315703823,
+      "grad_norm": 0.4142661988735199,
+      "learning_rate": 3.550135501355014e-05,
+      "loss": 0.0416,
+      "step": 525
+    },
+    {
+      "epoch": 0.22375915378356387,
+      "grad_norm": 0.2900916635990143,
+      "learning_rate": 3.7195121951219514e-05,
+      "loss": 0.044,
+      "step": 550
+    },
+    {
+      "epoch": 0.2339300244100895,
+      "grad_norm": 0.6529517769813538,
+      "learning_rate": 3.888888888888889e-05,
+      "loss": 0.0395,
+      "step": 575
+    },
+    {
+      "epoch": 0.24410089503661514,
+      "grad_norm": 0.4228176176548004,
+      "learning_rate": 4.058265582655827e-05,
+      "loss": 0.0379,
+      "step": 600
+    },
+    {
+      "epoch": 0.25427176566314075,
+      "grad_norm": 0.4153424799442291,
+      "learning_rate": 4.2276422764227644e-05,
+      "loss": 0.0419,
+      "step": 625
+    },
+    {
+      "epoch": 0.2644426362896664,
+      "grad_norm": 0.5038943290710449,
+      "learning_rate": 4.397018970189702e-05,
+      "loss": 0.0426,
+      "step": 650
+    },
+    {
+      "epoch": 0.274613506916192,
+      "grad_norm": 0.518610417842865,
+      "learning_rate": 4.56639566395664e-05,
+      "loss": 0.0415,
+      "step": 675
+    },
+    {
+      "epoch": 0.2847843775427177,
+      "grad_norm": 0.22950297594070435,
+      "learning_rate": 4.7357723577235774e-05,
+      "loss": 0.0401,
+      "step": 700
+    },
+    {
+      "epoch": 0.2949552481692433,
+      "grad_norm": 0.42790016531944275,
+      "learning_rate": 4.905149051490515e-05,
+      "loss": 0.0431,
+      "step": 725
+    },
+    {
+      "epoch": 0.3051261187957689,
+      "grad_norm": 0.24033579230308533,
+      "learning_rate": 4.991711874623267e-05,
+      "loss": 0.0436,
+      "step": 750
+    },
+    {
+      "epoch": 0.31529698942229456,
+      "grad_norm": 0.26290884613990784,
+      "learning_rate": 4.972875226039783e-05,
+      "loss": 0.0439,
+      "step": 775
+    },
+    {
+      "epoch": 0.32546786004882017,
+      "grad_norm": 0.20243217051029205,
+      "learning_rate": 4.9540385774562993e-05,
+      "loss": 0.04,
+      "step": 800
+    },
+    {
+      "epoch": 0.33563873067534583,
+      "grad_norm": 0.17254669964313507,
+      "learning_rate": 4.935201928872815e-05,
+      "loss": 0.0405,
+      "step": 825
+    },
+    {
+      "epoch": 0.34580960130187144,
+      "grad_norm": 0.2651669681072235,
+      "learning_rate": 4.916365280289331e-05,
+      "loss": 0.0376,
+      "step": 850
+    },
+    {
+      "epoch": 0.35598047192839705,
+      "grad_norm": 0.4401779770851135,
+      "learning_rate": 4.897528631705847e-05,
+      "loss": 0.038,
+      "step": 875
+    },
+    {
+      "epoch": 0.3661513425549227,
+      "grad_norm": 0.23679669201374054,
+      "learning_rate": 4.878691983122363e-05,
+      "loss": 0.0417,
+      "step": 900
+    },
+    {
+      "epoch": 0.3763222131814483,
+      "grad_norm": 0.2824667990207672,
+      "learning_rate": 4.8598553345388795e-05,
+      "loss": 0.0394,
+      "step": 925
+    },
+    {
+      "epoch": 0.386493083807974,
+      "grad_norm": 0.24676434695720673,
+      "learning_rate": 4.841018685955395e-05,
+      "loss": 0.0375,
+      "step": 950
+    },
+    {
+      "epoch": 0.3966639544344996,
+      "grad_norm": 0.3242221176624298,
+      "learning_rate": 4.822182037371911e-05,
+      "loss": 0.0384,
+      "step": 975
+    },
+    {
+      "epoch": 0.40683482506102525,
+      "grad_norm": 0.4461381733417511,
+      "learning_rate": 4.8033453887884274e-05,
+      "loss": 0.0376,
+      "step": 1000
+    },
+    {
+      "epoch": 0.41700569568755086,
+      "grad_norm": 0.3799073398113251,
+      "learning_rate": 4.784508740204943e-05,
+      "loss": 0.0415,
+      "step": 1025
+    },
+    {
+      "epoch": 0.42717656631407647,
+      "grad_norm": 0.21580813825130463,
+      "learning_rate": 4.765672091621459e-05,
+      "loss": 0.0389,
+      "step": 1050
+    },
+    {
+      "epoch": 0.43734743694060213,
+      "grad_norm": 0.27040156722068787,
+      "learning_rate": 4.7468354430379746e-05,
+      "loss": 0.0393,
+      "step": 1075
+    },
+    {
+      "epoch": 0.44751830756712774,
+      "grad_norm": 0.7307707071304321,
+      "learning_rate": 4.7279987944544904e-05,
+      "loss": 0.0411,
+      "step": 1100
+    },
+    {
+      "epoch": 0.4576891781936534,
+      "grad_norm": 0.29991018772125244,
+      "learning_rate": 4.709162145871007e-05,
+      "loss": 0.0396,
+      "step": 1125
+    },
+    {
+      "epoch": 0.467860048820179,
+      "grad_norm": 0.16599082946777344,
+      "learning_rate": 4.6903254972875226e-05,
+      "loss": 0.0385,
+      "step": 1150
+    },
+    {
+      "epoch": 0.4780309194467046,
+      "grad_norm": 0.23768579959869385,
+      "learning_rate": 4.671488848704039e-05,
+      "loss": 0.0391,
+      "step": 1175
+    },
+    {
+      "epoch": 0.4882017900732303,
+      "grad_norm": 0.43703967332839966,
+      "learning_rate": 4.652652200120555e-05,
+      "loss": 0.0371,
+      "step": 1200
+    },
+    {
+      "epoch": 0.4983726606997559,
+      "grad_norm": 0.17367880046367645,
+      "learning_rate": 4.6338155515370705e-05,
+      "loss": 0.0393,
+      "step": 1225
+    },
+    {
+      "epoch": 0.5085435313262815,
+      "grad_norm": 0.2584339678287506,
+      "learning_rate": 4.614978902953587e-05,
+      "loss": 0.0389,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5187144019528072,
+      "grad_norm": 0.3418385088443756,
+      "learning_rate": 4.596142254370103e-05,
+      "loss": 0.0372,
+      "step": 1275
+    },
+    {
+      "epoch": 0.5288852725793328,
+      "grad_norm": 0.41003596782684326,
+      "learning_rate": 4.5773056057866184e-05,
+      "loss": 0.0358,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5390561432058584,
+      "grad_norm": 0.5690125823020935,
+      "learning_rate": 4.558468957203135e-05,
+      "loss": 0.0424,
+      "step": 1325
+    },
+    {
+      "epoch": 0.549227013832384,
+      "grad_norm": 0.25894445180892944,
+      "learning_rate": 4.5396323086196506e-05,
+      "loss": 0.0404,
+      "step": 1350
+    },
+    {
+      "epoch": 0.5593978844589097,
+      "grad_norm": 0.3278766870498657,
+      "learning_rate": 4.520795660036167e-05,
+      "loss": 0.0394,
+      "step": 1375
+    },
+    {
+      "epoch": 0.5695687550854354,
+      "grad_norm": 0.2761504352092743,
+      "learning_rate": 4.501959011452683e-05,
+      "loss": 0.0376,
+      "step": 1400
+    },
+    {
+      "epoch": 0.5797396257119609,
+      "grad_norm": 0.1649962216615677,
+      "learning_rate": 4.4831223628691985e-05,
+      "loss": 0.0379,
+      "step": 1425
+    },
+    {
+      "epoch": 0.5899104963384866,
+      "grad_norm": 0.19504110515117645,
+      "learning_rate": 4.464285714285715e-05,
+      "loss": 0.0371,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6000813669650122,
+      "grad_norm": 0.17408576607704163,
+      "learning_rate": 4.445449065702231e-05,
+      "loss": 0.0394,
+      "step": 1475
+    },
+    {
+      "epoch": 0.6102522375915378,
+      "grad_norm": 0.5323575139045715,
+      "learning_rate": 4.4266124171187465e-05,
+      "loss": 0.0397,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6204231082180635,
+      "grad_norm": 0.6240959167480469,
+      "learning_rate": 4.407775768535262e-05,
+      "loss": 0.0382,
+      "step": 1525
+    },
+    {
+      "epoch": 0.6305939788445891,
+      "grad_norm": 0.34519892930984497,
+      "learning_rate": 4.388939119951778e-05,
+      "loss": 0.0412,
+      "step": 1550
+    },
+    {
+      "epoch": 0.6407648494711147,
+      "grad_norm": 0.6749323606491089,
+      "learning_rate": 4.3701024713682944e-05,
+      "loss": 0.039,
+      "step": 1575
+    },
+    {
+      "epoch": 0.6509357200976403,
+      "grad_norm": 0.3768059313297272,
+      "learning_rate": 4.35126582278481e-05,
+      "loss": 0.0386,
+      "step": 1600
+    },
+    {
+      "epoch": 0.661106590724166,
+      "grad_norm": 0.22068104147911072,
+      "learning_rate": 4.332429174201326e-05,
+      "loss": 0.0383,
+      "step": 1625
+    },
+    {
+      "epoch": 0.6712774613506917,
+      "grad_norm": 0.14016976952552795,
+      "learning_rate": 4.313592525617842e-05,
+      "loss": 0.039,
+      "step": 1650
+    },
+    {
+      "epoch": 0.6814483319772172,
+      "grad_norm": 0.17742925882339478,
+      "learning_rate": 4.294755877034358e-05,
+      "loss": 0.0369,
+      "step": 1675
+    },
+    {
+      "epoch": 0.6916192026037429,
+      "grad_norm": 0.1665901243686676,
+      "learning_rate": 4.2759192284508745e-05,
+      "loss": 0.0361,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7017900732302685,
+      "grad_norm": 0.20996783673763275,
+      "learning_rate": 4.25708257986739e-05,
+      "loss": 0.0372,
+      "step": 1725
+    },
+    {
+      "epoch": 0.7119609438567941,
+      "grad_norm": 0.5258492827415466,
+      "learning_rate": 4.238245931283906e-05,
+      "loss": 0.0377,
+      "step": 1750
+    },
+    {
+      "epoch": 0.7221318144833198,
+      "grad_norm": 0.2202778309583664,
+      "learning_rate": 4.2194092827004224e-05,
+      "loss": 0.0384,
+      "step": 1775
+    },
+    {
+      "epoch": 0.7323026851098454,
+      "grad_norm": 0.6594241857528687,
+      "learning_rate": 4.200572634116938e-05,
+      "loss": 0.0329,
+      "step": 1800
+    },
+    {
+      "epoch": 0.7424735557363711,
+      "grad_norm": 0.19332900643348694,
+      "learning_rate": 4.181735985533454e-05,
+      "loss": 0.0386,
+      "step": 1825
+    },
+    {
+      "epoch": 0.7526444263628966,
+      "grad_norm": 0.22850601375102997,
+      "learning_rate": 4.1628993369499704e-05,
+      "loss": 0.0367,
+      "step": 1850
+    },
+    {
+      "epoch": 0.7628152969894223,
+      "grad_norm": 0.5552480220794678,
+      "learning_rate": 4.144062688366486e-05,
+      "loss": 0.0355,
+      "step": 1875
+    },
+    {
+      "epoch": 0.772986167615948,
+      "grad_norm": 0.5165444612503052,
+      "learning_rate": 4.1252260397830025e-05,
+      "loss": 0.0373,
+      "step": 1900
+    },
+    {
+      "epoch": 0.7831570382424735,
+      "grad_norm": 0.1546785682439804,
+      "learning_rate": 4.1063893911995176e-05,
+      "loss": 0.0328,
+      "step": 1925
+    },
+    {
+      "epoch": 0.7933279088689992,
+      "grad_norm": 0.45292505621910095,
+      "learning_rate": 4.087552742616034e-05,
+      "loss": 0.0358,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8034987794955248,
+      "grad_norm": 0.31234049797058105,
+      "learning_rate": 4.06871609403255e-05,
+      "loss": 0.0375,
+      "step": 1975
+    },
+    {
+      "epoch": 0.8136696501220505,
+      "grad_norm": 0.26489096879959106,
+      "learning_rate": 4.0498794454490655e-05,
+      "loss": 0.0391,
+      "step": 2000
+    },
+    {
+      "epoch": 0.823840520748576,
+      "grad_norm": 0.30596041679382324,
+      "learning_rate": 4.031042796865582e-05,
+      "loss": 0.0356,
+      "step": 2025
+    },
+    {
+      "epoch": 0.8340113913751017,
+      "grad_norm": 0.22221428155899048,
+      "learning_rate": 4.012206148282098e-05,
+      "loss": 0.0349,
+      "step": 2050
+    },
+    {
+      "epoch": 0.8441822620016274,
+      "grad_norm": 0.27404096722602844,
+      "learning_rate": 3.9933694996986135e-05,
+      "loss": 0.0391,
+      "step": 2075
+    },
+    {
+      "epoch": 0.8543531326281529,
+      "grad_norm": 0.21221423149108887,
+      "learning_rate": 3.97453285111513e-05,
+      "loss": 0.0364,
+      "step": 2100
+    },
+    {
+      "epoch": 0.8645240032546786,
+      "grad_norm": 0.5386448502540588,
+      "learning_rate": 3.9556962025316456e-05,
+      "loss": 0.0369,
+      "step": 2125
+    },
+    {
+      "epoch": 0.8746948738812043,
+      "grad_norm": 0.2866438627243042,
+      "learning_rate": 3.936859553948162e-05,
+      "loss": 0.0363,
+      "step": 2150
+    },
+    {
+      "epoch": 0.8848657445077298,
+      "grad_norm": 0.300436407327652,
+      "learning_rate": 3.918022905364678e-05,
+      "loss": 0.0353,
+      "step": 2175
+    },
+    {
+      "epoch": 0.8950366151342555,
+      "grad_norm": 0.1889338344335556,
+      "learning_rate": 3.8991862567811936e-05,
+      "loss": 0.0362,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9052074857607811,
+      "grad_norm": 0.47652769088745117,
+      "learning_rate": 3.88034960819771e-05,
+      "loss": 0.0394,
+      "step": 2225
+    },
+    {
+      "epoch": 0.9153783563873068,
+      "grad_norm": 0.43233829736709595,
+      "learning_rate": 3.861512959614226e-05,
+      "loss": 0.0377,
+      "step": 2250
+    },
+    {
+      "epoch": 0.9255492270138324,
+      "grad_norm": 0.3206855058670044,
+      "learning_rate": 3.8426763110307415e-05,
+      "loss": 0.0341,
+      "step": 2275
+    },
+    {
+      "epoch": 0.935720097640358,
+      "grad_norm": 0.2778429090976715,
+      "learning_rate": 3.823839662447258e-05,
+      "loss": 0.035,
+      "step": 2300
+    },
+    {
+      "epoch": 0.9458909682668837,
+      "grad_norm": 0.22730959951877594,
+      "learning_rate": 3.805003013863774e-05,
+      "loss": 0.0346,
+      "step": 2325
+    },
+    {
+      "epoch": 0.9560618388934092,
+      "grad_norm": 0.29823628067970276,
+      "learning_rate": 3.78616636528029e-05,
+      "loss": 0.0363,
+      "step": 2350
+    },
+    {
+      "epoch": 0.9662327095199349,
+      "grad_norm": 0.5740765333175659,
+      "learning_rate": 3.767329716696805e-05,
+      "loss": 0.0348,
+      "step": 2375
+    },
+    {
+      "epoch": 0.9764035801464606,
+      "grad_norm": 0.4176069498062134,
+      "learning_rate": 3.748493068113321e-05,
+      "loss": 0.0387,
+      "step": 2400
+    },
+    {
+      "epoch": 0.9865744507729862,
+      "grad_norm": 0.14277301728725433,
+      "learning_rate": 3.7296564195298374e-05,
+      "loss": 0.0362,
+      "step": 2425
+    },
+    {
+      "epoch": 0.9967453213995118,
+      "grad_norm": 0.20792150497436523,
+      "learning_rate": 3.710819770946353e-05,
+      "loss": 0.0351,
+      "step": 2450
+    },
+    {
+      "epoch": 1.0,
+      "eval_explained_variance": 0.24840307235717773,
+      "eval_loss": 0.03510129451751709,
+      "eval_mae": 0.1494196206331253,
+      "eval_mse": 0.035100266337394714,
+      "eval_r2": 0.2482784390449524,
+      "eval_rmse": 0.18735065075252533,
+      "eval_runtime": 4.0725,
+      "eval_samples_per_second": 4826.955,
+      "eval_steps_per_second": 75.628,
+      "step": 2458
+    },
+    {
+      "epoch": 1.0069161920260374,
+      "grad_norm": 0.4044972360134125,
+      "learning_rate": 3.6919831223628695e-05,
+      "loss": 0.0344,
+      "step": 2475
+    },
+    {
+      "epoch": 1.017087062652563,
+      "grad_norm": 0.1920347958803177,
+      "learning_rate": 3.673146473779385e-05,
+      "loss": 0.0333,
+      "step": 2500
+    },
+    {
+      "epoch": 1.0272579332790888,
+      "grad_norm": 0.2734430432319641,
+      "learning_rate": 3.654309825195901e-05,
+      "loss": 0.0329,
+      "step": 2525
+    },
+    {
+      "epoch": 1.0374288039056143,
+      "grad_norm": 0.2458937019109726,
+      "learning_rate": 3.6354731766124175e-05,
+      "loss": 0.0341,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0475996745321399,
+      "grad_norm": 0.302209734916687,
+      "learning_rate": 3.616636528028933e-05,
+      "loss": 0.0336,
+      "step": 2575
+    },
+    {
+      "epoch": 1.0577705451586656,
+      "grad_norm": 0.23956753313541412,
+      "learning_rate": 3.597799879445449e-05,
+      "loss": 0.0351,
+      "step": 2600
+    },
+    {
+      "epoch": 1.0679414157851912,
+      "grad_norm": 0.4500243663787842,
+      "learning_rate": 3.5789632308619654e-05,
+      "loss": 0.031,
+      "step": 2625
+    },
+    {
+      "epoch": 1.0781122864117167,
+      "grad_norm": 0.23080125451087952,
+      "learning_rate": 3.560126582278481e-05,
+      "loss": 0.0358,
+      "step": 2650
+    },
+    {
+      "epoch": 1.0882831570382425,
+      "grad_norm": 0.44963157176971436,
+      "learning_rate": 3.5412899336949976e-05,
+      "loss": 0.0326,
+      "step": 2675
+    },
+    {
+      "epoch": 1.098454027664768,
+      "grad_norm": 0.721524715423584,
+      "learning_rate": 3.522453285111513e-05,
+      "loss": 0.0383,
+      "step": 2700
+    },
+    {
+      "epoch": 1.1086248982912936,
+      "grad_norm": 0.24731744825839996,
+      "learning_rate": 3.503616636528029e-05,
+      "loss": 0.0354,
+      "step": 2725
+    },
+    {
+      "epoch": 1.1187957689178194,
+      "grad_norm": 0.19786667823791504,
+      "learning_rate": 3.4847799879445455e-05,
+      "loss": 0.0335,
+      "step": 2750
+    },
+    {
+      "epoch": 1.128966639544345,
+      "grad_norm": 0.16557921469211578,
+      "learning_rate": 3.465943339361061e-05,
+      "loss": 0.0349,
+      "step": 2775
+    },
+    {
+      "epoch": 1.1391375101708707,
+      "grad_norm": 0.30080434679985046,
+      "learning_rate": 3.447106690777577e-05,
+      "loss": 0.0335,
+      "step": 2800
+    },
+    {
+      "epoch": 1.1493083807973963,
+      "grad_norm": 0.2929253876209259,
+      "learning_rate": 3.428270042194093e-05,
+      "loss": 0.0338,
+      "step": 2825
+    },
+    {
+      "epoch": 1.1594792514239218,
+      "grad_norm": 0.19096094369888306,
+      "learning_rate": 3.4094333936106085e-05,
+      "loss": 0.0307,
+      "step": 2850
+    },
+    {
+      "epoch": 1.1696501220504476,
+      "grad_norm": 0.2764925956726074,
+      "learning_rate": 3.390596745027125e-05,
+      "loss": 0.034,
+      "step": 2875
+    },
+    {
+      "epoch": 1.1798209926769732,
+      "grad_norm": 0.36045756936073303,
+      "learning_rate": 3.371760096443641e-05,
+      "loss": 0.0327,
+      "step": 2900
+    },
+    {
+      "epoch": 1.1899918633034987,
+      "grad_norm": 0.4153495728969574,
+      "learning_rate": 3.352923447860157e-05,
+      "loss": 0.0341,
+      "step": 2925
+    },
+    {
+      "epoch": 1.2001627339300245,
+      "grad_norm": 0.3471417725086212,
+      "learning_rate": 3.334086799276673e-05,
+      "loss": 0.0329,
+      "step": 2950
+    },
+    {
+      "epoch": 1.21033360455655,
+      "grad_norm": 0.3038371205329895,
+      "learning_rate": 3.3152501506931886e-05,
+      "loss": 0.0344,
+      "step": 2975
+    },
+    {
+      "epoch": 1.2205044751830756,
+      "grad_norm": 0.589869499206543,
+      "learning_rate": 3.296413502109705e-05,
+      "loss": 0.0369,
+      "step": 3000
+    },
+    {
+      "epoch": 1.2306753458096014,
+      "grad_norm": 0.19090452790260315,
+      "learning_rate": 3.277576853526221e-05,
+      "loss": 0.0325,
+      "step": 3025
+    },
+    {
+      "epoch": 1.240846216436127,
+      "grad_norm": 0.2946130335330963,
+      "learning_rate": 3.2587402049427365e-05,
+      "loss": 0.033,
+      "step": 3050
+    },
+    {
+      "epoch": 1.2510170870626527,
+      "grad_norm": 0.33456677198410034,
+      "learning_rate": 3.239903556359253e-05,
+      "loss": 0.0311,
+      "step": 3075
+    },
+    {
+      "epoch": 1.2611879576891782,
+      "grad_norm": 0.23250174522399902,
+      "learning_rate": 3.221066907775769e-05,
+      "loss": 0.034,
+      "step": 3100
+    },
+    {
+      "epoch": 1.2713588283157038,
+      "grad_norm": 0.2728452682495117,
+      "learning_rate": 3.2022302591922845e-05,
+      "loss": 0.0328,
+      "step": 3125
+    },
+    {
+      "epoch": 1.2815296989422293,
+      "grad_norm": 0.22802890837192535,
+      "learning_rate": 3.183393610608801e-05,
+      "loss": 0.0291,
+      "step": 3150
+    },
+    {
+      "epoch": 1.2917005695687551,
+      "grad_norm": 0.3694087862968445,
+      "learning_rate": 3.1645569620253167e-05,
+      "loss": 0.033,
+      "step": 3175
+    },
+    {
+      "epoch": 1.3018714401952807,
+      "grad_norm": 0.3649369478225708,
+      "learning_rate": 3.145720313441833e-05,
+      "loss": 0.0353,
+      "step": 3200
+    },
+    {
+      "epoch": 1.3120423108218064,
+      "grad_norm": 0.45792657136917114,
+      "learning_rate": 3.126883664858349e-05,
+      "loss": 0.0332,
+      "step": 3225
+    },
+    {
+      "epoch": 1.322213181448332,
+      "grad_norm": 0.325790137052536,
+      "learning_rate": 3.1080470162748646e-05,
+      "loss": 0.0314,
+      "step": 3250
+    },
+    {
+      "epoch": 1.3323840520748575,
+      "grad_norm": 0.25394582748413086,
+      "learning_rate": 3.08921036769138e-05,
+      "loss": 0.0347,
+      "step": 3275
+    },
+    {
+      "epoch": 1.342554922701383,
+      "grad_norm": 0.25821924209594727,
+      "learning_rate": 3.070373719107896e-05,
+      "loss": 0.0326,
+      "step": 3300
+    },
+    {
+      "epoch": 1.3527257933279089,
+      "grad_norm": 0.24513724446296692,
+      "learning_rate": 3.0515370705244122e-05,
+      "loss": 0.0323,
+      "step": 3325
+    },
+    {
+      "epoch": 1.3628966639544344,
+      "grad_norm": 0.5198257565498352,
+      "learning_rate": 3.0327004219409283e-05,
+      "loss": 0.0337,
+      "step": 3350
+    },
+    {
+      "epoch": 1.3730675345809602,
+      "grad_norm": 0.4150083661079407,
+      "learning_rate": 3.0138637733574443e-05,
+      "loss": 0.0313,
+      "step": 3375
+    },
+    {
+      "epoch": 1.3832384052074858,
+      "grad_norm": 0.148391991853714,
+      "learning_rate": 2.9950271247739604e-05,
+      "loss": 0.0351,
+      "step": 3400
+    },
+    {
+      "epoch": 1.3934092758340113,
+      "grad_norm": 0.23965270817279816,
+      "learning_rate": 2.9761904761904762e-05,
+      "loss": 0.0338,
+      "step": 3425
+    },
+    {
+      "epoch": 1.403580146460537,
+      "grad_norm": 0.21719323098659515,
+      "learning_rate": 2.9573538276069923e-05,
+      "loss": 0.0305,
+      "step": 3450
+    },
+    {
+      "epoch": 1.4137510170870626,
+      "grad_norm": 0.25082919001579285,
+      "learning_rate": 2.9385171790235084e-05,
+      "loss": 0.0328,
+      "step": 3475
+    },
+    {
+      "epoch": 1.4239218877135884,
+      "grad_norm": 0.3688701093196869,
+      "learning_rate": 2.9196805304400245e-05,
+      "loss": 0.0352,
+      "step": 3500
+    },
+    {
+      "epoch": 1.434092758340114,
+      "grad_norm": 0.3461095094680786,
+      "learning_rate": 2.9008438818565402e-05,
+      "loss": 0.0356,
+      "step": 3525
+    },
+    {
+      "epoch": 1.4442636289666395,
+      "grad_norm": 0.2626365125179291,
+      "learning_rate": 2.8820072332730563e-05,
+      "loss": 0.033,
+      "step": 3550
+    },
+    {
+      "epoch": 1.454434499593165,
+      "grad_norm": 0.27237704396247864,
+      "learning_rate": 2.8631705846895724e-05,
+      "loss": 0.0342,
+      "step": 3575
+    },
+    {
+      "epoch": 1.4646053702196908,
+      "grad_norm": 0.258208304643631,
+      "learning_rate": 2.8443339361060885e-05,
+      "loss": 0.0327,
+      "step": 3600
+    },
+    {
+      "epoch": 1.4747762408462164,
+      "grad_norm": 0.2694801390171051,
+      "learning_rate": 2.8254972875226042e-05,
+      "loss": 0.0353,
+      "step": 3625
+    },
+    {
+      "epoch": 1.4849471114727422,
+      "grad_norm": 0.3417627513408661,
+      "learning_rate": 2.8066606389391203e-05,
+      "loss": 0.0325,
+      "step": 3650
+    },
+    {
+      "epoch": 1.4951179820992677,
+      "grad_norm": 0.4780226945877075,
+      "learning_rate": 2.7878239903556357e-05,
+      "loss": 0.0338,
+      "step": 3675
+    },
+    {
+      "epoch": 1.5052888527257933,
+      "grad_norm": 0.2635546922683716,
+      "learning_rate": 2.7689873417721518e-05,
+      "loss": 0.0321,
+      "step": 3700
+    },
+    {
+      "epoch": 1.5154597233523188,
+      "grad_norm": 0.2416383922100067,
+      "learning_rate": 2.750150693188668e-05,
+      "loss": 0.0338,
+      "step": 3725
+    },
+    {
+      "epoch": 1.5256305939788446,
+      "grad_norm": 0.5338820219039917,
+      "learning_rate": 2.7313140446051837e-05,
+      "loss": 0.0319,
+      "step": 3750
+    },
+    {
+      "epoch": 1.5358014646053704,
+      "grad_norm": 0.35653921961784363,
+      "learning_rate": 2.7124773960216997e-05,
+      "loss": 0.0338,
+      "step": 3775
+    },
+    {
+      "epoch": 1.545972335231896,
+      "grad_norm": 0.23320654034614563,
+      "learning_rate": 2.693640747438216e-05,
+      "loss": 0.0312,
+      "step": 3800
+    },
+    {
+      "epoch": 1.5561432058584215,
+      "grad_norm": 0.31537100672721863,
+      "learning_rate": 2.674804098854732e-05,
+      "loss": 0.037,
+      "step": 3825
+    },
+    {
+      "epoch": 1.566314076484947,
+      "grad_norm": 0.24033057689666748,
+      "learning_rate": 2.6559674502712477e-05,
+      "loss": 0.0315,
+      "step": 3850
+    },
+    {
+      "epoch": 1.5764849471114726,
+      "grad_norm": 0.21600840985774994,
+      "learning_rate": 2.6371308016877638e-05,
+      "loss": 0.0329,
+      "step": 3875
+    },
+    {
+      "epoch": 1.5866558177379984,
+      "grad_norm": 0.4238574206829071,
+      "learning_rate": 2.61829415310428e-05,
+      "loss": 0.0356,
+      "step": 3900
+    },
+    {
+      "epoch": 1.5968266883645241,
+      "grad_norm": 0.3188216984272003,
+      "learning_rate": 2.599457504520796e-05,
+      "loss": 0.0321,
+      "step": 3925
+    },
+    {
+      "epoch": 1.6069975589910497,
+      "grad_norm": 0.29613322019577026,
+      "learning_rate": 2.5806208559373117e-05,
+      "loss": 0.0288,
+      "step": 3950
+    },
+    {
+      "epoch": 1.6171684296175752,
+      "grad_norm": 0.3398037850856781,
+      "learning_rate": 2.5617842073538278e-05,
+      "loss": 0.0356,
+      "step": 3975
+    },
+    {
+      "epoch": 1.6273393002441008,
+      "grad_norm": 0.7744150757789612,
+      "learning_rate": 2.542947558770344e-05,
+      "loss": 0.03,
+      "step": 4000
+    },
+    {
+      "epoch": 1.6375101708706266,
+      "grad_norm": 0.2858869135379791,
+      "learning_rate": 2.52411091018686e-05,
+      "loss": 0.0313,
+      "step": 4025
+    },
+    {
+      "epoch": 1.647681041497152,
+      "grad_norm": 0.5115847587585449,
+      "learning_rate": 2.5052742616033757e-05,
+      "loss": 0.0321,
+      "step": 4050
+    },
+    {
+      "epoch": 1.6578519121236779,
+      "grad_norm": 0.2705928385257721,
+      "learning_rate": 2.4864376130198915e-05,
+      "loss": 0.0299,
+      "step": 4075
+    },
+    {
+      "epoch": 1.6680227827502034,
+      "grad_norm": 0.23888413608074188,
+      "learning_rate": 2.4676009644364075e-05,
+      "loss": 0.0302,
+      "step": 4100
+    },
+    {
+      "epoch": 1.678193653376729,
+      "grad_norm": 0.251142680644989,
+      "learning_rate": 2.4487643158529236e-05,
+      "loss": 0.0352,
+      "step": 4125
+    },
+    {
+      "epoch": 1.6883645240032545,
+      "grad_norm": 0.4335423707962036,
+      "learning_rate": 2.4299276672694397e-05,
+      "loss": 0.0342,
+      "step": 4150
+    },
+    {
+      "epoch": 1.6985353946297803,
+      "grad_norm": 0.26633137464523315,
+      "learning_rate": 2.4110910186859555e-05,
+      "loss": 0.0303,
+      "step": 4175
+    },
+    {
+      "epoch": 1.708706265256306,
+      "grad_norm": 0.18798017501831055,
+      "learning_rate": 2.3922543701024716e-05,
+      "loss": 0.0318,
+      "step": 4200
+    },
+    {
+      "epoch": 1.7188771358828316,
+      "grad_norm": 0.14591360092163086,
+      "learning_rate": 2.3734177215189873e-05,
+      "loss": 0.0306,
+      "step": 4225
+    },
+    {
+      "epoch": 1.7290480065093572,
+      "grad_norm": 0.22364169359207153,
+      "learning_rate": 2.3545810729355034e-05,
+      "loss": 0.031,
+      "step": 4250
+    },
+    {
+      "epoch": 1.7392188771358827,
+      "grad_norm": 0.28795015811920166,
+      "learning_rate": 2.3357444243520195e-05,
+      "loss": 0.0337,
+      "step": 4275
+    },
+    {
+      "epoch": 1.7493897477624083,
+      "grad_norm": 0.5833514332771301,
+      "learning_rate": 2.3169077757685352e-05,
+      "loss": 0.0356,
+      "step": 4300
+    },
+    {
+      "epoch": 1.759560618388934,
+      "grad_norm": 0.20125386118888855,
+      "learning_rate": 2.2980711271850513e-05,
+      "loss": 0.035,
+      "step": 4325
+    },
+    {
+      "epoch": 1.7697314890154598,
+      "grad_norm": 0.39359915256500244,
+      "learning_rate": 2.2792344786015674e-05,
+      "loss": 0.0325,
+      "step": 4350
+    },
+    {
+      "epoch": 1.7799023596419854,
+      "grad_norm": 0.18621224164962769,
+      "learning_rate": 2.2603978300180835e-05,
+      "loss": 0.0287,
+      "step": 4375
+    },
+    {
+      "epoch": 1.790073230268511,
+      "grad_norm": 0.23905445635318756,
+      "learning_rate": 2.2415611814345993e-05,
+      "loss": 0.0338,
+      "step": 4400
+    },
+    {
+      "epoch": 1.8002441008950365,
+      "grad_norm": 0.5699689984321594,
+      "learning_rate": 2.2227245328511154e-05,
+      "loss": 0.034,
+      "step": 4425
+    },
+    {
+      "epoch": 1.8104149715215623,
+      "grad_norm": 0.3101656138896942,
+      "learning_rate": 2.203887884267631e-05,
+      "loss": 0.0344,
+      "step": 4450
+    },
+    {
+      "epoch": 1.8205858421480878,
+      "grad_norm": 0.19921617209911346,
+      "learning_rate": 2.1850512356841472e-05,
+      "loss": 0.0348,
+      "step": 4475
+    },
+    {
+      "epoch": 1.8307567127746136,
+      "grad_norm": 0.4034786522388458,
+      "learning_rate": 2.166214587100663e-05,
+      "loss": 0.0332,
+      "step": 4500
+    },
+    {
+      "epoch": 1.8409275834011392,
+      "grad_norm": 0.5094945430755615,
+      "learning_rate": 2.147377938517179e-05,
+      "loss": 0.0293,
+      "step": 4525
+    },
+    {
+      "epoch": 1.8510984540276647,
+      "grad_norm": 0.4296955168247223,
+      "learning_rate": 2.128541289933695e-05,
+      "loss": 0.0318,
+      "step": 4550
+    },
+    {
+      "epoch": 1.8612693246541903,
+      "grad_norm": 0.2767828404903412,
+      "learning_rate": 2.1097046413502112e-05,
+      "loss": 0.0337,
+      "step": 4575
+    },
+    {
+      "epoch": 1.871440195280716,
+      "grad_norm": 0.2815410792827606,
+      "learning_rate": 2.090867992766727e-05,
+      "loss": 0.0322,
+      "step": 4600
+    },
+    {
+      "epoch": 1.8816110659072418,
+      "grad_norm": 0.2712397873401642,
+      "learning_rate": 2.072031344183243e-05,
+      "loss": 0.0309,
+      "step": 4625
+    },
+    {
+      "epoch": 1.8917819365337674,
+      "grad_norm": 0.48615217208862305,
+      "learning_rate": 2.0531946955997588e-05,
+      "loss": 0.0335,
+      "step": 4650
+    },
+    {
+      "epoch": 1.901952807160293,
+      "grad_norm": 0.20523346960544586,
+      "learning_rate": 2.034358047016275e-05,
+      "loss": 0.032,
+      "step": 4675
+    },
+    {
+      "epoch": 1.9121236777868185,
+      "grad_norm": 0.21368514001369476,
+      "learning_rate": 2.015521398432791e-05,
+      "loss": 0.0323,
+      "step": 4700
+    },
+    {
+      "epoch": 1.922294548413344,
+      "grad_norm": 0.5739328265190125,
+      "learning_rate": 1.9966847498493067e-05,
+      "loss": 0.0324,
+      "step": 4725
+    },
+    {
+      "epoch": 1.9324654190398698,
+      "grad_norm": 0.34018173813819885,
+      "learning_rate": 1.9778481012658228e-05,
+      "loss": 0.0333,
+      "step": 4750
+    },
+    {
+      "epoch": 1.9426362896663956,
+      "grad_norm": 0.29020246863365173,
+      "learning_rate": 1.959011452682339e-05,
+      "loss": 0.032,
+      "step": 4775
+    },
+    {
+      "epoch": 1.9528071602929211,
+      "grad_norm": 0.2339148074388504,
+      "learning_rate": 1.940174804098855e-05,
+      "loss": 0.0307,
+      "step": 4800
+    },
+    {
+      "epoch": 1.9629780309194467,
+      "grad_norm": 0.23286353051662445,
+      "learning_rate": 1.9213381555153708e-05,
+      "loss": 0.0339,
+      "step": 4825
+    },
+    {
+      "epoch": 1.9731489015459722,
+      "grad_norm": 0.2174939662218094,
+      "learning_rate": 1.902501506931887e-05,
+      "loss": 0.0318,
+      "step": 4850
+    },
+    {
+      "epoch": 1.983319772172498,
+      "grad_norm": 0.2397424727678299,
+      "learning_rate": 1.8836648583484026e-05,
+      "loss": 0.032,
+      "step": 4875
+    },
+    {
+      "epoch": 1.9934906427990235,
+      "grad_norm": 0.3197477161884308,
+      "learning_rate": 1.8648282097649187e-05,
+      "loss": 0.0319,
+      "step": 4900
+    },
+    {
+      "epoch": 2.0,
+      "eval_explained_variance": 0.2763633728027344,
+      "eval_loss": 0.03380444645881653,
+      "eval_mae": 0.14292894303798676,
+      "eval_mse": 0.033803146332502365,
+      "eval_r2": 0.2760580778121948,
+      "eval_rmse": 0.18385631980571776,
+      "eval_runtime": 4.0998,
+      "eval_samples_per_second": 4794.858,
+      "eval_steps_per_second": 75.125,
+      "step": 4916
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 7374,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 1
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5209584826318848.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4916/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51a4264247fbe1fe88d05c68d61b8084763d2f8eb4fde109f383a88bcaed7463
+size 5368

config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "distilbert/distilbert-base-uncased",
+  "_num_labels": 1,
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "target"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "target": 0
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "regression",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52992a6b1bb7b233e4054bc086b088013d7e7613bcffead944d24fd70d64c55e
+size 267829484

runs/Feb22_02-13-45_r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih/events.out.tfevents.1740190427.r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih.208.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb7f2b7d9f9435c3c6c5309d2e64e8b97600a6e7a4e2f247f057edb00cf5d18e
-size 59253

 version https://git-lfs.github.com/spec/v1
+oid sha256:5382562246cf343789c90591b47487305c08fddb4bb31ad9423e1e4afb19f09d
+size 68990

runs/Feb22_02-13-45_r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih/events.out.tfevents.1740191139.r-samtuckervegan-autotrain-4xl4-41bfog4i-7c075-ypwih.208.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c56ff9864bfc4fb732368db29e9cb5af3bfdc993c409ac957d8db486cf30a2a
+size 609

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51a4264247fbe1fe88d05c68d61b8084763d2f8eb4fde109f383a88bcaed7463
+size 5368

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "samtuckervegan/text_performance",
+    "model": "distilbert/distilbert-base-uncased",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 128,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "test",
+    "text_column": "text",
+    "target_column": "target",
+    "logging_steps": -1,
+    "project_name": "textprediction",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "samtuckervegan",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff