Pranay17 commited on
Commit
b732614
·
verified ·
1 Parent(s): 9f9d96c

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb87aacff72e1602dfd126734b3b0a18e3f3d5965e2a314893e17028ba457700
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc5d4c3ea89cdc82e2f431aa8d1e7c54e201d61a41d4760248be0ded6f9736d
3
  size 42002584
last-checkpoint/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d3be43090292fc3f408bfa7323dda07b05a49247f049954c58fd96ffd03826
3
+ size 251710672
last-checkpoint/global_step1000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce18e282a0f287d33a4933b44086a76be864c3d30a6b509b2f377111f5059ef
3
+ size 153747385
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step500
 
1
+ global_step1000
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38d028753a88adea82238f430350e9b4298093ea8e04516836108c27462c1365
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7212cfde88fad4aa734540c0da66cf99f58abbd8a4c5ec6aecca4d821435319d
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.849002849002849,
5
  "eval_steps": 1000,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -84,6 +84,76 @@
84
  "learning_rate": 0.00017527527527527528,
85
  "loss": 0.4327,
86
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  }
88
  ],
89
  "logging_steps": 50,
@@ -103,7 +173,7 @@
103
  "attributes": {}
104
  }
105
  },
106
- "total_flos": 1.5049248454213632e+16,
107
  "train_batch_size": 2,
108
  "trial_name": null,
109
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.698005698005698,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
84
  "learning_rate": 0.00017527527527527528,
85
  "loss": 0.4327,
86
  "step": 500
87
+ },
88
+ {
89
+ "epoch": 3.133903133903134,
90
+ "grad_norm": 3.940809488296509,
91
+ "learning_rate": 0.00017277277277277277,
92
+ "loss": 0.3364,
93
+ "step": 550
94
+ },
95
+ {
96
+ "epoch": 3.4188034188034186,
97
+ "grad_norm": 3.061803102493286,
98
+ "learning_rate": 0.00017027027027027028,
99
+ "loss": 0.2445,
100
+ "step": 600
101
+ },
102
+ {
103
+ "epoch": 3.7037037037037037,
104
+ "grad_norm": 3.389284372329712,
105
+ "learning_rate": 0.00016776776776776777,
106
+ "loss": 0.2597,
107
+ "step": 650
108
+ },
109
+ {
110
+ "epoch": 3.9886039886039883,
111
+ "grad_norm": 3.320084810256958,
112
+ "learning_rate": 0.00016526526526526526,
113
+ "loss": 0.2698,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 4.273504273504273,
118
+ "grad_norm": 2.7199738025665283,
119
+ "learning_rate": 0.00016276276276276275,
120
+ "loss": 0.1781,
121
+ "step": 750
122
+ },
123
+ {
124
+ "epoch": 4.5584045584045585,
125
+ "grad_norm": 3.226743459701538,
126
+ "learning_rate": 0.00016026026026026027,
127
+ "loss": 0.1902,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 4.843304843304844,
132
+ "grad_norm": 4.62879753112793,
133
+ "learning_rate": 0.00015775775775775776,
134
+ "loss": 0.209,
135
+ "step": 850
136
+ },
137
+ {
138
+ "epoch": 5.128205128205128,
139
+ "grad_norm": 2.747284412384033,
140
+ "learning_rate": 0.00015525525525525525,
141
+ "loss": 0.1786,
142
+ "step": 900
143
+ },
144
+ {
145
+ "epoch": 5.413105413105413,
146
+ "grad_norm": 2.259187936782837,
147
+ "learning_rate": 0.00015275275275275277,
148
+ "loss": 0.1536,
149
+ "step": 950
150
+ },
151
+ {
152
+ "epoch": 5.698005698005698,
153
+ "grad_norm": 1.9622772932052612,
154
+ "learning_rate": 0.00015025025025025026,
155
+ "loss": 0.156,
156
+ "step": 1000
157
  }
158
  ],
159
  "logging_steps": 50,
 
173
  "attributes": {}
174
  }
175
  },
176
+ "total_flos": 3.0228228471783424e+16,
177
  "train_batch_size": 2,
178
  "trial_name": null,
179
  "trial_params": null