nypgd commited on
Commit
165aaf8
·
verified ·
1 Parent(s): e920dc2

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fca072b2c749413dd5ce3683dd32b4c068e76430d79ceb2f957de0ea2ec09ea
3
  size 131251312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228058b2e2c5093b95942a8c6188d917712e871458f4a4b99601a43efa4262e2
3
  size 131251312
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8169611cb1c8a402ac90dcbdee5673db1566e3b8599f63a52edae45763b12854
3
  size 61093892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064125a886fe952aaedb264e4a7daa313b8e3048086f9d3fb8cea54f7bc2023f
3
  size 61093892
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:608fccb6c056ce88cdfd5355e6be2046f4d107a24a87c6b0d2c3b200ce6bb4ea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7ecf15e83ac4d18e0d90f8a44821af2f304313a6ae05eeb21767226a79c463
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cdce33ebc5972235a89e7008a7bf54a98fa227109b4975663485ad96089f907
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,67 +2,158 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8045977011494253,
6
  "eval_steps": 500,
7
- "global_step": 35,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.11494252873563218,
14
- "grad_norm": 0.9143027067184448,
15
  "learning_rate": 0.00016,
16
- "loss": 2.7102,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.22988505747126436,
21
- "grad_norm": 0.8938872814178467,
22
- "learning_rate": 0.00017333333333333334,
23
- "loss": 1.9796,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.3448275862068966,
28
- "grad_norm": 0.9273212552070618,
29
- "learning_rate": 0.00014,
30
- "loss": 1.4091,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.45977011494252873,
35
- "grad_norm": 0.556845486164093,
36
- "learning_rate": 0.00010666666666666667,
37
- "loss": 1.2369,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.5747126436781609,
42
- "grad_norm": 0.3468436598777771,
43
- "learning_rate": 7.333333333333333e-05,
44
- "loss": 1.071,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.6896551724137931,
49
- "grad_norm": 0.4009336829185486,
50
- "learning_rate": 4e-05,
51
- "loss": 1.0702,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 0.8045977011494253,
56
- "grad_norm": 0.2942411005496979,
57
- "learning_rate": 6.666666666666667e-06,
58
- "loss": 0.9927,
59
  "step": 35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
  ],
62
  "logging_steps": 5,
63
- "max_steps": 35,
64
  "num_input_tokens_seen": 0,
65
- "num_train_epochs": 1,
66
  "save_steps": 500,
67
  "stateful_callbacks": {
68
  "TrainerControl": {
@@ -76,7 +167,7 @@
76
  "attributes": {}
77
  }
78
  },
79
- "total_flos": 1947736416225600.0,
80
  "train_batch_size": 2,
81
  "trial_name": null,
82
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.2758620689655173,
6
  "eval_steps": 500,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.11494252873563218,
14
+ "grad_norm": 0.9145353436470032,
15
  "learning_rate": 0.00016,
16
+ "loss": 2.7096,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.22988505747126436,
21
+ "grad_norm": 0.88957279920578,
22
+ "learning_rate": 0.00019157894736842104,
23
+ "loss": 1.9743,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.3448275862068966,
28
+ "grad_norm": 1.1346380710601807,
29
+ "learning_rate": 0.00018105263157894739,
30
+ "loss": 1.3837,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.45977011494252873,
35
+ "grad_norm": 0.4288289546966553,
36
+ "learning_rate": 0.0001705263157894737,
37
+ "loss": 1.2195,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.5747126436781609,
42
+ "grad_norm": 0.3345566391944885,
43
+ "learning_rate": 0.00016,
44
+ "loss": 1.0581,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.6896551724137931,
49
+ "grad_norm": 0.29079899191856384,
50
+ "learning_rate": 0.00014947368421052633,
51
+ "loss": 1.0582,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 0.8045977011494253,
56
+ "grad_norm": 0.39675313234329224,
57
+ "learning_rate": 0.00013894736842105264,
58
+ "loss": 0.9695,
59
  "step": 35
60
+ },
61
+ {
62
+ "epoch": 0.9195402298850575,
63
+ "grad_norm": 0.31116706132888794,
64
+ "learning_rate": 0.00012842105263157895,
65
+ "loss": 1.1187,
66
+ "step": 40
67
+ },
68
+ {
69
+ "epoch": 1.0229885057471264,
70
+ "grad_norm": 0.24762211740016937,
71
+ "learning_rate": 0.00011789473684210525,
72
+ "loss": 1.121,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 1.1379310344827587,
77
+ "grad_norm": 0.34917253255844116,
78
+ "learning_rate": 0.00010736842105263158,
79
+ "loss": 0.9679,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 1.2528735632183907,
84
+ "grad_norm": 0.36430123448371887,
85
+ "learning_rate": 9.68421052631579e-05,
86
+ "loss": 0.9875,
87
+ "step": 55
88
+ },
89
+ {
90
+ "epoch": 1.367816091954023,
91
+ "grad_norm": 0.4741533696651459,
92
+ "learning_rate": 8.631578947368421e-05,
93
+ "loss": 1.0023,
94
+ "step": 60
95
+ },
96
+ {
97
+ "epoch": 1.4827586206896552,
98
+ "grad_norm": 0.25711435079574585,
99
+ "learning_rate": 7.578947368421054e-05,
100
+ "loss": 1.0899,
101
+ "step": 65
102
+ },
103
+ {
104
+ "epoch": 1.5977011494252875,
105
+ "grad_norm": 0.2915607690811157,
106
+ "learning_rate": 6.526315789473685e-05,
107
+ "loss": 1.0803,
108
+ "step": 70
109
+ },
110
+ {
111
+ "epoch": 1.7126436781609196,
112
+ "grad_norm": 0.2989424765110016,
113
+ "learning_rate": 5.4736842105263165e-05,
114
+ "loss": 1.0777,
115
+ "step": 75
116
+ },
117
+ {
118
+ "epoch": 1.8275862068965516,
119
+ "grad_norm": 0.32646244764328003,
120
+ "learning_rate": 4.421052631578947e-05,
121
+ "loss": 1.0842,
122
+ "step": 80
123
+ },
124
+ {
125
+ "epoch": 1.9425287356321839,
126
+ "grad_norm": 0.2813500463962555,
127
+ "learning_rate": 3.368421052631579e-05,
128
+ "loss": 1.1267,
129
+ "step": 85
130
+ },
131
+ {
132
+ "epoch": 2.045977011494253,
133
+ "grad_norm": 0.3768353760242462,
134
+ "learning_rate": 2.3157894736842107e-05,
135
+ "loss": 0.9741,
136
+ "step": 90
137
+ },
138
+ {
139
+ "epoch": 2.160919540229885,
140
+ "grad_norm": 0.29582270979881287,
141
+ "learning_rate": 1.2631578947368422e-05,
142
+ "loss": 1.0951,
143
+ "step": 95
144
+ },
145
+ {
146
+ "epoch": 2.2758620689655173,
147
+ "grad_norm": 0.3590467572212219,
148
+ "learning_rate": 2.105263157894737e-06,
149
+ "loss": 0.9928,
150
+ "step": 100
151
  }
152
  ],
153
  "logging_steps": 5,
154
+ "max_steps": 100,
155
  "num_input_tokens_seen": 0,
156
+ "num_train_epochs": 3,
157
  "save_steps": 500,
158
  "stateful_callbacks": {
159
  "TrainerControl": {
 
167
  "attributes": {}
168
  }
169
  },
170
+ "total_flos": 5557554405792480.0,
171
  "train_batch_size": 2,
172
  "trial_name": null,
173
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2734847dfff81ea8622046f310eb8aeae1a36e938141362dc50bb07314c32ab2
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f956480e3a7cc6b3b40b9db889dba9bd9d06ee68c04dd9c29257e7d05713e73d
3
  size 5752