Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

adapter_model.safetensors +1 -1
optimizer.pt +3 -0
rng_state.pth +3 -0
scaler.pt +3 -0
scheduler.pt +3 -0
trainer_state.json +594 -0
training_args.bin +3 -0

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29bb1cf9d39944c9cd7115354bc81c47baf1cf2a25b9b9f24f8c1419e83b0e5e
 size 3158328

 version https://git-lfs.github.com/spec/v1
+oid sha256:e92e7c0e65d124154f882db6e6b89ae7dedbcf6ca149012ec2de51653e86644f
 size 3158328

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0701747dc14bac44bc2768ca59a83df1f871b155a2e2c5492794c93f184fd54
+size 6372346

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae6ca1c4675db6b88190f773cd2468c2e86958ece56a79a0a2ac9f552b528d5d
+size 14244

scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59a4acfb0da74c479080613978839dd3cbb4608fd2c07e764b4c844401d8dd5f
+size 988

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71c858e7d80e455b20613a1a6bfebd32fda797d631d427b2c8f97c0128f7aba4
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,594 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.07341445982456207,
+      "learning_rate": 4.93875e-05,
+      "loss": 2.2129,
+      "step": 50
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.05993746593594551,
+      "learning_rate": 4.87625e-05,
+      "loss": 1.9761,
+      "step": 100
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.09222695976495743,
+      "learning_rate": 4.8137500000000005e-05,
+      "loss": 2.1171,
+      "step": 150
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.10137380659580231,
+      "learning_rate": 4.75125e-05,
+      "loss": 2.1274,
+      "step": 200
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 0.1008363887667656,
+      "learning_rate": 4.68875e-05,
+      "loss": 1.9111,
+      "step": 250
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.09414055943489075,
+      "learning_rate": 4.6262500000000006e-05,
+      "loss": 1.9712,
+      "step": 300
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 0.08397311717271805,
+      "learning_rate": 4.56375e-05,
+      "loss": 1.9442,
+      "step": 350
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.11413953453302383,
+      "learning_rate": 4.50125e-05,
+      "loss": 1.8472,
+      "step": 400
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 0.1147843673825264,
+      "learning_rate": 4.43875e-05,
+      "loss": 1.88,
+      "step": 450
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.15569846332073212,
+      "learning_rate": 4.37625e-05,
+      "loss": 1.8664,
+      "step": 500
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 0.1375017762184143,
+      "learning_rate": 4.3137500000000005e-05,
+      "loss": 1.8417,
+      "step": 550
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.10806793719530106,
+      "learning_rate": 4.2512499999999997e-05,
+      "loss": 1.8872,
+      "step": 600
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 0.12453680485486984,
+      "learning_rate": 4.18875e-05,
+      "loss": 1.8628,
+      "step": 650
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 0.1216558963060379,
+      "learning_rate": 4.126250000000001e-05,
+      "loss": 1.9252,
+      "step": 700
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 0.10894916951656342,
+      "learning_rate": 4.06375e-05,
+      "loss": 1.8206,
+      "step": 750
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.11225900053977966,
+      "learning_rate": 4.0012500000000004e-05,
+      "loss": 1.8024,
+      "step": 800
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 0.0950043648481369,
+      "learning_rate": 3.93875e-05,
+      "loss": 1.8965,
+      "step": 850
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.15110863745212555,
+      "learning_rate": 3.87625e-05,
+      "loss": 1.892,
+      "step": 900
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 0.11794973164796829,
+      "learning_rate": 3.8137500000000005e-05,
+      "loss": 1.846,
+      "step": 950
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.09780783951282501,
+      "learning_rate": 3.7512500000000004e-05,
+      "loss": 1.8793,
+      "step": 1000
+    },
+    {
+      "epoch": 1.05,
+      "grad_norm": 0.10895536839962006,
+      "learning_rate": 3.68875e-05,
+      "loss": 1.8671,
+      "step": 1050
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 0.11591296643018723,
+      "learning_rate": 3.62625e-05,
+      "loss": 1.8809,
+      "step": 1100
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 0.12027107179164886,
+      "learning_rate": 3.56375e-05,
+      "loss": 1.8601,
+      "step": 1150
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.17965424060821533,
+      "learning_rate": 3.5012500000000004e-05,
+      "loss": 1.79,
+      "step": 1200
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.10349484533071518,
+      "learning_rate": 3.43875e-05,
+      "loss": 1.911,
+      "step": 1250
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 0.11761331558227539,
+      "learning_rate": 3.37625e-05,
+      "loss": 1.8324,
+      "step": 1300
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 0.10469675809144974,
+      "learning_rate": 3.31375e-05,
+      "loss": 1.8358,
+      "step": 1350
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.1126856803894043,
+      "learning_rate": 3.2512500000000004e-05,
+      "loss": 1.7683,
+      "step": 1400
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 0.13452214002609253,
+      "learning_rate": 3.18875e-05,
+      "loss": 1.8371,
+      "step": 1450
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 0.13590937852859497,
+      "learning_rate": 3.12625e-05,
+      "loss": 1.8016,
+      "step": 1500
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 0.16644452512264252,
+      "learning_rate": 3.06375e-05,
+      "loss": 1.828,
+      "step": 1550
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.1445678323507309,
+      "learning_rate": 3.0012499999999998e-05,
+      "loss": 1.8417,
+      "step": 1600
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 0.10797803103923798,
+      "learning_rate": 2.9387500000000003e-05,
+      "loss": 1.8364,
+      "step": 1650
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 0.14495541155338287,
+      "learning_rate": 2.8762500000000005e-05,
+      "loss": 1.7685,
+      "step": 1700
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 0.19478319585323334,
+      "learning_rate": 2.81375e-05,
+      "loss": 1.8443,
+      "step": 1750
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.1155581995844841,
+      "learning_rate": 2.75125e-05,
+      "loss": 1.8308,
+      "step": 1800
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 0.16222324967384338,
+      "learning_rate": 2.68875e-05,
+      "loss": 1.8612,
+      "step": 1850
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 0.12850694358348846,
+      "learning_rate": 2.62625e-05,
+      "loss": 1.7941,
+      "step": 1900
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 0.15454839169979095,
+      "learning_rate": 2.5637500000000003e-05,
+      "loss": 1.8409,
+      "step": 1950
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.13590778410434723,
+      "learning_rate": 2.50125e-05,
+      "loss": 1.8786,
+      "step": 2000
+    },
+    {
+      "epoch": 2.05,
+      "grad_norm": 0.16667228937149048,
+      "learning_rate": 2.4387500000000003e-05,
+      "loss": 1.8638,
+      "step": 2050
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 0.1315215826034546,
+      "learning_rate": 2.37625e-05,
+      "loss": 1.8444,
+      "step": 2100
+    },
+    {
+      "epoch": 2.15,
+      "grad_norm": 0.12784986197948456,
+      "learning_rate": 2.31375e-05,
+      "loss": 1.8937,
+      "step": 2150
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.1757715791463852,
+      "learning_rate": 2.2512500000000002e-05,
+      "loss": 1.8579,
+      "step": 2200
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 0.13813862204551697,
+      "learning_rate": 2.18875e-05,
+      "loss": 1.8044,
+      "step": 2250
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 0.1028163880109787,
+      "learning_rate": 2.1262500000000002e-05,
+      "loss": 1.8371,
+      "step": 2300
+    },
+    {
+      "epoch": 2.35,
+      "grad_norm": 0.1259194314479828,
+      "learning_rate": 2.06375e-05,
+      "loss": 1.7808,
+      "step": 2350
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.14981134235858917,
+      "learning_rate": 2.0012500000000002e-05,
+      "loss": 1.791,
+      "step": 2400
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 0.17923866212368011,
+      "learning_rate": 1.93875e-05,
+      "loss": 1.7812,
+      "step": 2450
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.1661010980606079,
+      "learning_rate": 1.87625e-05,
+      "loss": 1.8722,
+      "step": 2500
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 0.1746157556772232,
+      "learning_rate": 1.81375e-05,
+      "loss": 1.7818,
+      "step": 2550
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 0.13667204976081848,
+      "learning_rate": 1.7512500000000002e-05,
+      "loss": 1.8312,
+      "step": 2600
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 0.149635910987854,
+      "learning_rate": 1.68875e-05,
+      "loss": 1.813,
+      "step": 2650
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 0.1654106080532074,
+      "learning_rate": 1.62625e-05,
+      "loss": 1.8739,
+      "step": 2700
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 0.14472809433937073,
+      "learning_rate": 1.56375e-05,
+      "loss": 1.7726,
+      "step": 2750
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.14457519352436066,
+      "learning_rate": 1.5012500000000002e-05,
+      "loss": 1.7593,
+      "step": 2800
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 0.12135745584964752,
+      "learning_rate": 1.43875e-05,
+      "loss": 1.8547,
+      "step": 2850
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 0.18297483026981354,
+      "learning_rate": 1.37625e-05,
+      "loss": 1.852,
+      "step": 2900
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 0.13699814677238464,
+      "learning_rate": 1.31375e-05,
+      "loss": 1.8064,
+      "step": 2950
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.1180616170167923,
+      "learning_rate": 1.25125e-05,
+      "loss": 1.8457,
+      "step": 3000
+    },
+    {
+      "epoch": 3.05,
+      "grad_norm": 0.13587267696857452,
+      "learning_rate": 1.18875e-05,
+      "loss": 1.8615,
+      "step": 3050
+    },
+    {
+      "epoch": 3.1,
+      "grad_norm": 0.13397973775863647,
+      "learning_rate": 1.1262500000000001e-05,
+      "loss": 1.8077,
+      "step": 3100
+    },
+    {
+      "epoch": 3.15,
+      "grad_norm": 0.13365299999713898,
+      "learning_rate": 1.0637500000000001e-05,
+      "loss": 1.9439,
+      "step": 3150
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.1443161964416504,
+      "learning_rate": 1.0012500000000001e-05,
+      "loss": 1.8582,
+      "step": 3200
+    },
+    {
+      "epoch": 3.25,
+      "grad_norm": 0.14696797728538513,
+      "learning_rate": 9.387500000000001e-06,
+      "loss": 1.781,
+      "step": 3250
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 0.15039022266864777,
+      "learning_rate": 8.7625e-06,
+      "loss": 1.8631,
+      "step": 3300
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 0.1311933696269989,
+      "learning_rate": 8.137500000000001e-06,
+      "loss": 1.8544,
+      "step": 3350
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 0.15743456780910492,
+      "learning_rate": 7.5125000000000005e-06,
+      "loss": 1.7619,
+      "step": 3400
+    },
+    {
+      "epoch": 3.45,
+      "grad_norm": 0.1521286964416504,
+      "learning_rate": 6.8875000000000005e-06,
+      "loss": 1.8013,
+      "step": 3450
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 0.22098106145858765,
+      "learning_rate": 6.262500000000001e-06,
+      "loss": 1.799,
+      "step": 3500
+    },
+    {
+      "epoch": 3.55,
+      "grad_norm": 0.17960400879383087,
+      "learning_rate": 5.637500000000001e-06,
+      "loss": 1.7757,
+      "step": 3550
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 0.13921096920967102,
+      "learning_rate": 5.012500000000001e-06,
+      "loss": 1.8261,
+      "step": 3600
+    },
+    {
+      "epoch": 3.65,
+      "grad_norm": 0.1500893533229828,
+      "learning_rate": 4.3875e-06,
+      "loss": 1.8084,
+      "step": 3650
+    },
+    {
+      "epoch": 3.7,
+      "grad_norm": 0.16856829822063446,
+      "learning_rate": 3.7625e-06,
+      "loss": 1.8691,
+      "step": 3700
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 0.14953631162643433,
+      "learning_rate": 3.1375e-06,
+      "loss": 1.7682,
+      "step": 3750
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 0.14736834168434143,
+      "learning_rate": 2.5125e-06,
+      "loss": 1.7549,
+      "step": 3800
+    },
+    {
+      "epoch": 3.85,
+      "grad_norm": 0.1247793510556221,
+      "learning_rate": 1.8875e-06,
+      "loss": 1.8505,
+      "step": 3850
+    },
+    {
+      "epoch": 3.9,
+      "grad_norm": 0.1904279887676239,
+      "learning_rate": 1.2625000000000002e-06,
+      "loss": 1.8485,
+      "step": 3900
+    },
+    {
+      "epoch": 3.95,
+      "grad_norm": 0.13930699229240417,
+      "learning_rate": 6.375e-07,
+      "loss": 1.8034,
+      "step": 3950
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.1202196553349495,
+      "learning_rate": 1.2500000000000001e-08,
+      "loss": 1.8422,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.360735748096e+16,
+  "train_batch_size": 10,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76d1d18100c90f968b787f300279b75f704d674df1dc4e0364972389fca68a1f
+size 5304