natsu0wu commited on
Commit
ea2b5cb
·
verified ·
1 Parent(s): 2ee8906

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -16,13 +16,13 @@
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "up_proj",
24
- "o_proj",
25
  "Wqkv",
 
 
26
  "gate_proj",
27
  "down_proj"
28
  ],
 
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 128,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "Wqkv",
24
+ "o_proj",
25
+ "up_proj",
26
  "gate_proj",
27
  "down_proj"
28
  ],
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4c653323e8da28fbb868ad2f81198f303bd89f299eb88397203332e532218a1
3
- size 340808816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f10b5e235575ff905ef62225011f7e3ce5b4f4879f96f7cf3fef8f0c0e9e9d7
3
+ size 681596224
phi_sft/checkpoint-21/adapter_config.json CHANGED
@@ -16,13 +16,13 @@
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "up_proj",
24
- "o_proj",
25
  "Wqkv",
 
 
26
  "gate_proj",
27
  "down_proj"
28
  ],
 
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 128,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "Wqkv",
24
+ "o_proj",
25
+ "up_proj",
26
  "gate_proj",
27
  "down_proj"
28
  ],
phi_sft/checkpoint-21/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4c653323e8da28fbb868ad2f81198f303bd89f299eb88397203332e532218a1
3
- size 340808816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f10b5e235575ff905ef62225011f7e3ce5b4f4879f96f7cf3fef8f0c0e9e9d7
3
+ size 681596224
phi_sft/checkpoint-21/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47f7b91246b53fb888d3f8ab9983dbcf0286787d69149a95745755a73390b06b
3
- size 173249466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6a42be5bd64e2870b5edf24fc5c7d96dafa38a07e566a77e401ff3bbf9f580
3
+ size 346305466
phi_sft/checkpoint-21/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b2e4ccd69d798db98663671f57e5e40c79092c0c1b0b814eb55c94f4d3ddd11
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9c0ab8a6b2af0730d844b181b1379186b7de559fff85508fd0d3dc05e74d44
3
  size 1064
phi_sft/checkpoint-21/trainer_state.json CHANGED
@@ -8,22 +8,155 @@
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.4035087719298245,
13
- "grad_norm": 0.2112729400396347,
14
- "learning_rate": 1.0000000000000002e-06,
15
- "loss": 1.1129,
16
  "step": 10
17
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  {
19
  "epoch": 2.807017543859649,
20
- "grad_norm": 0.24816565215587616,
21
- "learning_rate": 2.0000000000000003e-06,
22
- "loss": 1.1337,
23
  "step": 20
 
 
 
 
 
 
 
24
  }
25
  ],
26
- "logging_steps": 10,
27
  "max_steps": 21,
28
  "num_input_tokens_seen": 0,
29
  "num_train_epochs": 3,
@@ -40,7 +173,7 @@
40
  "attributes": {}
41
  }
42
  },
43
- "total_flos": 3.88976427472896e+16,
44
  "train_batch_size": 4,
45
  "trial_name": null,
46
  "trial_params": null
 
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.14035087719298245,
13
+ "grad_norm": 0.15580356121063232,
14
+ "learning_rate": 4e-08,
15
+ "loss": 1.1187,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.2807017543859649,
20
+ "grad_norm": 0.22189772129058838,
21
+ "learning_rate": 8e-08,
22
+ "loss": 1.1567,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.42105263157894735,
27
+ "grad_norm": 0.1661478728055954,
28
+ "learning_rate": 1.2000000000000002e-07,
29
+ "loss": 1.0823,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.5614035087719298,
34
+ "grad_norm": 0.20104122161865234,
35
+ "learning_rate": 1.6e-07,
36
+ "loss": 1.1356,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.7017543859649122,
41
+ "grad_norm": 0.16658252477645874,
42
+ "learning_rate": 2.0000000000000002e-07,
43
+ "loss": 1.1005,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.8421052631578947,
48
+ "grad_norm": 0.5534899830818176,
49
+ "learning_rate": 2.4000000000000003e-07,
50
+ "loss": 1.1723,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.9824561403508771,
55
+ "grad_norm": 0.16570821404457092,
56
+ "learning_rate": 2.8e-07,
57
+ "loss": 1.0728,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 1.1228070175438596,
62
+ "grad_norm": 0.3442396819591522,
63
+ "learning_rate": 3.2e-07,
64
+ "loss": 1.1199,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 1.263157894736842,
69
+ "grad_norm": 0.16980823874473572,
70
+ "learning_rate": 3.6e-07,
71
+ "loss": 1.1108,
72
+ "step": 9
73
+ },
74
  {
75
  "epoch": 1.4035087719298245,
76
+ "grad_norm": 0.5345178246498108,
77
+ "learning_rate": 4.0000000000000003e-07,
78
+ "loss": 1.06,
79
  "step": 10
80
  },
81
+ {
82
+ "epoch": 1.543859649122807,
83
+ "grad_norm": 0.5070182681083679,
84
+ "learning_rate": 4.4e-07,
85
+ "loss": 1.095,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 1.6842105263157894,
90
+ "grad_norm": 0.13856372237205505,
91
+ "learning_rate": 4.800000000000001e-07,
92
+ "loss": 1.1721,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 1.8245614035087718,
97
+ "grad_norm": 0.1828210949897766,
98
+ "learning_rate": 5.2e-07,
99
+ "loss": 1.1585,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 1.9649122807017543,
104
+ "grad_norm": 0.19238212704658508,
105
+ "learning_rate": 5.6e-07,
106
+ "loss": 1.1213,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 2.1052631578947367,
111
+ "grad_norm": 0.17399907112121582,
112
+ "learning_rate": 6.000000000000001e-07,
113
+ "loss": 1.1722,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 2.245614035087719,
118
+ "grad_norm": 0.22373908758163452,
119
+ "learning_rate": 6.4e-07,
120
+ "loss": 1.1152,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 2.3859649122807016,
125
+ "grad_norm": 0.24265660345554352,
126
+ "learning_rate": 6.800000000000001e-07,
127
+ "loss": 1.1472,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 2.526315789473684,
132
+ "grad_norm": 0.1733134537935257,
133
+ "learning_rate": 7.2e-07,
134
+ "loss": 1.1316,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 2.6666666666666665,
139
+ "grad_norm": 0.14888471364974976,
140
+ "learning_rate": 7.6e-07,
141
+ "loss": 0.9929,
142
+ "step": 19
143
+ },
144
  {
145
  "epoch": 2.807017543859649,
146
+ "grad_norm": 0.1817580908536911,
147
+ "learning_rate": 8.000000000000001e-07,
148
+ "loss": 1.2424,
149
  "step": 20
150
+ },
151
+ {
152
+ "epoch": 2.9473684210526314,
153
+ "grad_norm": 0.13880617916584015,
154
+ "learning_rate": 8.400000000000001e-07,
155
+ "loss": 1.1539,
156
+ "step": 21
157
  }
158
  ],
159
+ "logging_steps": 1,
160
  "max_steps": 21,
161
  "num_input_tokens_seen": 0,
162
  "num_train_epochs": 3,
 
173
  "attributes": {}
174
  }
175
  },
176
+ "total_flos": 3.9136378493952e+16,
177
  "train_batch_size": 4,
178
  "trial_name": null,
179
  "trial_params": null
phi_sft/checkpoint-21/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660e00c8924c1ca69c5c471e9f192e23d4be5cc016db2182b7d705a67e822cf5
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c424ce14c289f589bf014fc597c47716f9cdb7a66ed0621bf8cdcce4a513c526
3
  size 5432
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660e00c8924c1ca69c5c471e9f192e23d4be5cc016db2182b7d705a67e822cf5
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c424ce14c289f589bf014fc597c47716f9cdb7a66ed0621bf8cdcce4a513c526
3
  size 5432