Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

adapter_config.json +3 -3
adapter_model.safetensors +2 -2
phi_sft/checkpoint-21/adapter_config.json +3 -3
phi_sft/checkpoint-21/adapter_model.safetensors +2 -2
phi_sft/checkpoint-21/optimizer.pt +2 -2
phi_sft/checkpoint-21/scheduler.pt +1 -1
phi_sft/checkpoint-21/trainer_state.json +141 -8
phi_sft/checkpoint-21/training_args.bin +1 -1
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -16,13 +16,13 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "o_proj",
     "Wqkv",
     "gate_proj",
     "down_proj"
   ],

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "Wqkv",
+    "o_proj",
+    "up_proj",
     "gate_proj",
     "down_proj"
   ],

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4c653323e8da28fbb868ad2f81198f303bd89f299eb88397203332e532218a1
-size 340808816

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f10b5e235575ff905ef62225011f7e3ce5b4f4879f96f7cf3fef8f0c0e9e9d7
+size 681596224

phi_sft/checkpoint-21/adapter_config.json CHANGED Viewed

@@ -16,13 +16,13 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "o_proj",
     "Wqkv",
     "gate_proj",
     "down_proj"
   ],

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "Wqkv",
+    "o_proj",
+    "up_proj",
     "gate_proj",
     "down_proj"
   ],

phi_sft/checkpoint-21/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4c653323e8da28fbb868ad2f81198f303bd89f299eb88397203332e532218a1
-size 340808816

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f10b5e235575ff905ef62225011f7e3ce5b4f4879f96f7cf3fef8f0c0e9e9d7
+size 681596224

phi_sft/checkpoint-21/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47f7b91246b53fb888d3f8ab9983dbcf0286787d69149a95745755a73390b06b
-size 173249466

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a6a42be5bd64e2870b5edf24fc5c7d96dafa38a07e566a77e401ff3bbf9f580
+size 346305466

phi_sft/checkpoint-21/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b2e4ccd69d798db98663671f57e5e40c79092c0c1b0b814eb55c94f4d3ddd11
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea9c0ab8a6b2af0730d844b181b1379186b7de559fff85508fd0d3dc05e74d44
 size 1064

phi_sft/checkpoint-21/trainer_state.json CHANGED Viewed

@@ -8,22 +8,155 @@
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 1.4035087719298245,
-      "grad_norm": 0.2112729400396347,
-      "learning_rate": 1.0000000000000002e-06,
-      "loss": 1.1129,
       "step": 10
     },
     {
       "epoch": 2.807017543859649,
-      "grad_norm": 0.24816565215587616,
-      "learning_rate": 2.0000000000000003e-06,
-      "loss": 1.1337,
       "step": 20
     }
   ],
-  "logging_steps": 10,
   "max_steps": 21,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
@@ -40,7 +173,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.88976427472896e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 0.14035087719298245,
+      "grad_norm": 0.15580356121063232,
+      "learning_rate": 4e-08,
+      "loss": 1.1187,
+      "step": 1
+    },
+    {
+      "epoch": 0.2807017543859649,
+      "grad_norm": 0.22189772129058838,
+      "learning_rate": 8e-08,
+      "loss": 1.1567,
+      "step": 2
+    },
+    {
+      "epoch": 0.42105263157894735,
+      "grad_norm": 0.1661478728055954,
+      "learning_rate": 1.2000000000000002e-07,
+      "loss": 1.0823,
+      "step": 3
+    },
+    {
+      "epoch": 0.5614035087719298,
+      "grad_norm": 0.20104122161865234,
+      "learning_rate": 1.6e-07,
+      "loss": 1.1356,
+      "step": 4
+    },
+    {
+      "epoch": 0.7017543859649122,
+      "grad_norm": 0.16658252477645874,
+      "learning_rate": 2.0000000000000002e-07,
+      "loss": 1.1005,
+      "step": 5
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.5534899830818176,
+      "learning_rate": 2.4000000000000003e-07,
+      "loss": 1.1723,
+      "step": 6
+    },
+    {
+      "epoch": 0.9824561403508771,
+      "grad_norm": 0.16570821404457092,
+      "learning_rate": 2.8e-07,
+      "loss": 1.0728,
+      "step": 7
+    },
+    {
+      "epoch": 1.1228070175438596,
+      "grad_norm": 0.3442396819591522,
+      "learning_rate": 3.2e-07,
+      "loss": 1.1199,
+      "step": 8
+    },
+    {
+      "epoch": 1.263157894736842,
+      "grad_norm": 0.16980823874473572,
+      "learning_rate": 3.6e-07,
+      "loss": 1.1108,
+      "step": 9
+    },
     {
       "epoch": 1.4035087719298245,
+      "grad_norm": 0.5345178246498108,
+      "learning_rate": 4.0000000000000003e-07,
+      "loss": 1.06,
       "step": 10
     },
+    {
+      "epoch": 1.543859649122807,
+      "grad_norm": 0.5070182681083679,
+      "learning_rate": 4.4e-07,
+      "loss": 1.095,
+      "step": 11
+    },
+    {
+      "epoch": 1.6842105263157894,
+      "grad_norm": 0.13856372237205505,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 1.1721,
+      "step": 12
+    },
+    {
+      "epoch": 1.8245614035087718,
+      "grad_norm": 0.1828210949897766,
+      "learning_rate": 5.2e-07,
+      "loss": 1.1585,
+      "step": 13
+    },
+    {
+      "epoch": 1.9649122807017543,
+      "grad_norm": 0.19238212704658508,
+      "learning_rate": 5.6e-07,
+      "loss": 1.1213,
+      "step": 14
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "grad_norm": 0.17399907112121582,
+      "learning_rate": 6.000000000000001e-07,
+      "loss": 1.1722,
+      "step": 15
+    },
+    {
+      "epoch": 2.245614035087719,
+      "grad_norm": 0.22373908758163452,
+      "learning_rate": 6.4e-07,
+      "loss": 1.1152,
+      "step": 16
+    },
+    {
+      "epoch": 2.3859649122807016,
+      "grad_norm": 0.24265660345554352,
+      "learning_rate": 6.800000000000001e-07,
+      "loss": 1.1472,
+      "step": 17
+    },
+    {
+      "epoch": 2.526315789473684,
+      "grad_norm": 0.1733134537935257,
+      "learning_rate": 7.2e-07,
+      "loss": 1.1316,
+      "step": 18
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.14888471364974976,
+      "learning_rate": 7.6e-07,
+      "loss": 0.9929,
+      "step": 19
+    },
     {
       "epoch": 2.807017543859649,
+      "grad_norm": 0.1817580908536911,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 1.2424,
       "step": 20
+    },
+    {
+      "epoch": 2.9473684210526314,
+      "grad_norm": 0.13880617916584015,
+      "learning_rate": 8.400000000000001e-07,
+      "loss": 1.1539,
+      "step": 21
     }
   ],
+  "logging_steps": 1,
   "max_steps": 21,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
       "attributes": {}
     }
   },
+  "total_flos": 3.9136378493952e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

phi_sft/checkpoint-21/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:660e00c8924c1ca69c5c471e9f192e23d4be5cc016db2182b7d705a67e822cf5
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:c424ce14c289f589bf014fc597c47716f9cdb7a66ed0621bf8cdcce4a513c526
 size 5432

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:660e00c8924c1ca69c5c471e9f192e23d4be5cc016db2182b7d705a67e822cf5
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:c424ce14c289f589bf014fc597c47716f9cdb7a66ed0621bf8cdcce4a513c526
 size 5432