Pranay17 commited on
Commit
dd24ae4
·
verified ·
1 Parent(s): 159ce31

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6912a030171d886527041b82690c91d03e1ea271b7d5519b38bdb4adc8170e97
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a023d8eae2b5ccc492ba729567d59e031019da2e3ca4bafb6d61d0d3388d86
3
  size 42002584
last-checkpoint/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5b21d5e924cff4d05438ea3ca0b3615f241ee4ddb122ee35eb35ff94632f15
3
+ size 251710672
last-checkpoint/global_step1000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85987ca267ea13d304a0c6ba3db8e4cb9594a3ac97b5c27c8d905b35241c8a98
3
+ size 47955328
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step500
 
1
+ global_step1000
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04a8c070f714277e8a9f59e5821f85863d3c7f99e04d4f37d4f8d9de9791794d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c98879aba5b00d81dca96ce732b312683376f3923af7bda8eb2a40d4da8d98d
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.849002849002849,
5
  "eval_steps": 1000,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -84,6 +84,76 @@
84
  "learning_rate": 0.00017527527527527528,
85
  "loss": 0.1918,
86
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  }
88
  ],
89
  "logging_steps": 50,
@@ -103,7 +173,7 @@
103
  "attributes": {}
104
  }
105
  },
106
- "total_flos": 1.3515320547344384e+16,
107
  "train_batch_size": 2,
108
  "trial_name": null,
109
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.698005698005698,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
84
  "learning_rate": 0.00017527527527527528,
85
  "loss": 0.1918,
86
  "step": 500
87
+ },
88
+ {
89
+ "epoch": 3.133903133903134,
90
+ "grad_norm": 2.571071147918701,
91
+ "learning_rate": 0.00017277277277277277,
92
+ "loss": 0.1541,
93
+ "step": 550
94
+ },
95
+ {
96
+ "epoch": 3.4188034188034186,
97
+ "grad_norm": 1.5915725231170654,
98
+ "learning_rate": 0.00017027027027027028,
99
+ "loss": 0.1156,
100
+ "step": 600
101
+ },
102
+ {
103
+ "epoch": 3.7037037037037037,
104
+ "grad_norm": 1.2947059869766235,
105
+ "learning_rate": 0.00016776776776776777,
106
+ "loss": 0.1157,
107
+ "step": 650
108
+ },
109
+ {
110
+ "epoch": 3.9886039886039883,
111
+ "grad_norm": 1.7114081382751465,
112
+ "learning_rate": 0.00016526526526526526,
113
+ "loss": 0.1309,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 4.273504273504273,
118
+ "grad_norm": 2.1177897453308105,
119
+ "learning_rate": 0.00016276276276276275,
120
+ "loss": 0.099,
121
+ "step": 750
122
+ },
123
+ {
124
+ "epoch": 4.5584045584045585,
125
+ "grad_norm": 2.950777292251587,
126
+ "learning_rate": 0.00016026026026026027,
127
+ "loss": 0.1007,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 4.843304843304844,
132
+ "grad_norm": 2.4155728816986084,
133
+ "learning_rate": 0.00015775775775775776,
134
+ "loss": 0.102,
135
+ "step": 850
136
+ },
137
+ {
138
+ "epoch": 5.128205128205128,
139
+ "grad_norm": 1.3441689014434814,
140
+ "learning_rate": 0.00015525525525525525,
141
+ "loss": 0.0948,
142
+ "step": 900
143
+ },
144
+ {
145
+ "epoch": 5.413105413105413,
146
+ "grad_norm": 1.041314721107483,
147
+ "learning_rate": 0.00015275275275275277,
148
+ "loss": 0.0851,
149
+ "step": 950
150
+ },
151
+ {
152
+ "epoch": 5.698005698005698,
153
+ "grad_norm": 1.53568696975708,
154
+ "learning_rate": 0.00015025025025025026,
155
+ "loss": 0.0876,
156
+ "step": 1000
157
  }
158
  ],
159
  "logging_steps": 50,
 
173
  "attributes": {}
174
  }
175
  },
176
+ "total_flos": 2.7161049585156096e+16,
177
  "train_batch_size": 2,
178
  "trial_name": null,
179
  "trial_params": null