somaia02 commited on
Commit
c4a2f7f
·
1 Parent(s): 9e12c26

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d9f1330624d49e1cfa068a1cfd37e35fba53b8c452104e1861364c4b2bcc193
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb65aad7f5881ab6c4f55f41fa5aa18d8451e3142560b840323390718fbf1c0
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e1273154004834ff66e75e29f96e8a925d89006ab79c55eac7eeca115990223
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed6c83118141e829ba55ae59bd3661bdfd7b741a28a938857f067ee13bda6e1f
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:635c549447e6d040590768c59571f091ceb0002258d8b337cb9e12c5e9806440
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cd6b45f83835221dcdf23f243180950e962516b14dd7ff28fbb69bb83387d6c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66074cbfdf1af5e0486634010580a05d2b8863ee9a8c7ccf5e54b33878144bd6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c59c2a801a017f8138f2d9ac3b94589723ddfbbf1ae570418aa9bfac089535
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4271390736103058,
3
  "best_model_checkpoint": "bart_lora_outputs\\checkpoint-3000",
4
- "epoch": 4.893964110929853,
5
  "eval_steps": 100,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2047,13 +2047,353 @@
2047
  "eval_samples_per_second": 91.022,
2048
  "eval_steps_per_second": 11.426,
2049
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2050
  }
2051
  ],
2052
  "logging_steps": 10,
2053
  "max_steps": 6130,
2054
  "num_train_epochs": 10,
2055
  "save_steps": 500,
2056
- "total_flos": 5664599237689344.0,
2057
  "trial_name": null,
2058
  "trial_params": null
2059
  }
 
1
  {
2
  "best_metric": 0.4271390736103058,
3
  "best_model_checkpoint": "bart_lora_outputs\\checkpoint-3000",
4
+ "epoch": 5.709624796084829,
5
  "eval_steps": 100,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2047
  "eval_samples_per_second": 91.022,
2048
  "eval_steps_per_second": 11.426,
2049
  "step": 3000
2050
+ },
2051
+ {
2052
+ "epoch": 4.91,
2053
+ "learning_rate": 0.0005541740674955595,
2054
+ "loss": 0.4681,
2055
+ "step": 3010
2056
+ },
2057
+ {
2058
+ "epoch": 4.93,
2059
+ "learning_rate": 0.0005523978685612788,
2060
+ "loss": 0.4466,
2061
+ "step": 3020
2062
+ },
2063
+ {
2064
+ "epoch": 4.94,
2065
+ "learning_rate": 0.0005506216696269982,
2066
+ "loss": 0.4146,
2067
+ "step": 3030
2068
+ },
2069
+ {
2070
+ "epoch": 4.96,
2071
+ "learning_rate": 0.0005488454706927176,
2072
+ "loss": 0.4909,
2073
+ "step": 3040
2074
+ },
2075
+ {
2076
+ "epoch": 4.98,
2077
+ "learning_rate": 0.0005470692717584369,
2078
+ "loss": 0.4382,
2079
+ "step": 3050
2080
+ },
2081
+ {
2082
+ "epoch": 4.99,
2083
+ "learning_rate": 0.0005452930728241563,
2084
+ "loss": 0.4615,
2085
+ "step": 3060
2086
+ },
2087
+ {
2088
+ "epoch": 5.01,
2089
+ "learning_rate": 0.0005435168738898757,
2090
+ "loss": 0.4114,
2091
+ "step": 3070
2092
+ },
2093
+ {
2094
+ "epoch": 5.02,
2095
+ "learning_rate": 0.0005417406749555951,
2096
+ "loss": 0.4365,
2097
+ "step": 3080
2098
+ },
2099
+ {
2100
+ "epoch": 5.04,
2101
+ "learning_rate": 0.0005399644760213144,
2102
+ "loss": 0.4459,
2103
+ "step": 3090
2104
+ },
2105
+ {
2106
+ "epoch": 5.06,
2107
+ "learning_rate": 0.0005381882770870338,
2108
+ "loss": 0.4326,
2109
+ "step": 3100
2110
+ },
2111
+ {
2112
+ "epoch": 5.06,
2113
+ "eval_loss": 0.42266643047332764,
2114
+ "eval_runtime": 12.9188,
2115
+ "eval_samples_per_second": 90.643,
2116
+ "eval_steps_per_second": 11.379,
2117
+ "step": 3100
2118
+ },
2119
+ {
2120
+ "epoch": 5.07,
2121
+ "learning_rate": 0.0005364120781527531,
2122
+ "loss": 0.4261,
2123
+ "step": 3110
2124
+ },
2125
+ {
2126
+ "epoch": 5.09,
2127
+ "learning_rate": 0.0005346358792184724,
2128
+ "loss": 0.4112,
2129
+ "step": 3120
2130
+ },
2131
+ {
2132
+ "epoch": 5.11,
2133
+ "learning_rate": 0.0005328596802841918,
2134
+ "loss": 0.4144,
2135
+ "step": 3130
2136
+ },
2137
+ {
2138
+ "epoch": 5.12,
2139
+ "learning_rate": 0.0005310834813499112,
2140
+ "loss": 0.4119,
2141
+ "step": 3140
2142
+ },
2143
+ {
2144
+ "epoch": 5.14,
2145
+ "learning_rate": 0.0005293072824156305,
2146
+ "loss": 0.3985,
2147
+ "step": 3150
2148
+ },
2149
+ {
2150
+ "epoch": 5.15,
2151
+ "learning_rate": 0.0005275310834813499,
2152
+ "loss": 0.4249,
2153
+ "step": 3160
2154
+ },
2155
+ {
2156
+ "epoch": 5.17,
2157
+ "learning_rate": 0.0005257548845470693,
2158
+ "loss": 0.4289,
2159
+ "step": 3170
2160
+ },
2161
+ {
2162
+ "epoch": 5.19,
2163
+ "learning_rate": 0.0005239786856127886,
2164
+ "loss": 0.446,
2165
+ "step": 3180
2166
+ },
2167
+ {
2168
+ "epoch": 5.2,
2169
+ "learning_rate": 0.000522202486678508,
2170
+ "loss": 0.4505,
2171
+ "step": 3190
2172
+ },
2173
+ {
2174
+ "epoch": 5.22,
2175
+ "learning_rate": 0.0005204262877442274,
2176
+ "loss": 0.4613,
2177
+ "step": 3200
2178
+ },
2179
+ {
2180
+ "epoch": 5.22,
2181
+ "eval_loss": 0.4256543815135956,
2182
+ "eval_runtime": 12.7792,
2183
+ "eval_samples_per_second": 91.634,
2184
+ "eval_steps_per_second": 11.503,
2185
+ "step": 3200
2186
+ },
2187
+ {
2188
+ "epoch": 5.24,
2189
+ "learning_rate": 0.0005186500888099467,
2190
+ "loss": 0.4168,
2191
+ "step": 3210
2192
+ },
2193
+ {
2194
+ "epoch": 5.25,
2195
+ "learning_rate": 0.000516873889875666,
2196
+ "loss": 0.4754,
2197
+ "step": 3220
2198
+ },
2199
+ {
2200
+ "epoch": 5.27,
2201
+ "learning_rate": 0.0005150976909413854,
2202
+ "loss": 0.4053,
2203
+ "step": 3230
2204
+ },
2205
+ {
2206
+ "epoch": 5.29,
2207
+ "learning_rate": 0.0005133214920071047,
2208
+ "loss": 0.4463,
2209
+ "step": 3240
2210
+ },
2211
+ {
2212
+ "epoch": 5.3,
2213
+ "learning_rate": 0.0005115452930728241,
2214
+ "loss": 0.4651,
2215
+ "step": 3250
2216
+ },
2217
+ {
2218
+ "epoch": 5.32,
2219
+ "learning_rate": 0.0005097690941385435,
2220
+ "loss": 0.4347,
2221
+ "step": 3260
2222
+ },
2223
+ {
2224
+ "epoch": 5.33,
2225
+ "learning_rate": 0.0005079928952042628,
2226
+ "loss": 0.4227,
2227
+ "step": 3270
2228
+ },
2229
+ {
2230
+ "epoch": 5.35,
2231
+ "learning_rate": 0.0005062166962699822,
2232
+ "loss": 0.4292,
2233
+ "step": 3280
2234
+ },
2235
+ {
2236
+ "epoch": 5.37,
2237
+ "learning_rate": 0.0005044404973357016,
2238
+ "loss": 0.4408,
2239
+ "step": 3290
2240
+ },
2241
+ {
2242
+ "epoch": 5.38,
2243
+ "learning_rate": 0.0005026642984014209,
2244
+ "loss": 0.446,
2245
+ "step": 3300
2246
+ },
2247
+ {
2248
+ "epoch": 5.38,
2249
+ "eval_loss": 0.4269155263900757,
2250
+ "eval_runtime": 12.9043,
2251
+ "eval_samples_per_second": 90.745,
2252
+ "eval_steps_per_second": 11.392,
2253
+ "step": 3300
2254
+ },
2255
+ {
2256
+ "epoch": 5.4,
2257
+ "learning_rate": 0.0005008880994671403,
2258
+ "loss": 0.4059,
2259
+ "step": 3310
2260
+ },
2261
+ {
2262
+ "epoch": 5.42,
2263
+ "learning_rate": 0.0004991119005328598,
2264
+ "loss": 0.4111,
2265
+ "step": 3320
2266
+ },
2267
+ {
2268
+ "epoch": 5.43,
2269
+ "learning_rate": 0.000497335701598579,
2270
+ "loss": 0.3952,
2271
+ "step": 3330
2272
+ },
2273
+ {
2274
+ "epoch": 5.45,
2275
+ "learning_rate": 0.0004955595026642984,
2276
+ "loss": 0.4449,
2277
+ "step": 3340
2278
+ },
2279
+ {
2280
+ "epoch": 5.46,
2281
+ "learning_rate": 0.0004937833037300178,
2282
+ "loss": 0.4312,
2283
+ "step": 3350
2284
+ },
2285
+ {
2286
+ "epoch": 5.48,
2287
+ "learning_rate": 0.0004920071047957371,
2288
+ "loss": 0.428,
2289
+ "step": 3360
2290
+ },
2291
+ {
2292
+ "epoch": 5.5,
2293
+ "learning_rate": 0.0004902309058614565,
2294
+ "loss": 0.4202,
2295
+ "step": 3370
2296
+ },
2297
+ {
2298
+ "epoch": 5.51,
2299
+ "learning_rate": 0.0004884547069271759,
2300
+ "loss": 0.4164,
2301
+ "step": 3380
2302
+ },
2303
+ {
2304
+ "epoch": 5.53,
2305
+ "learning_rate": 0.0004866785079928952,
2306
+ "loss": 0.407,
2307
+ "step": 3390
2308
+ },
2309
+ {
2310
+ "epoch": 5.55,
2311
+ "learning_rate": 0.0004849023090586146,
2312
+ "loss": 0.4,
2313
+ "step": 3400
2314
+ },
2315
+ {
2316
+ "epoch": 5.55,
2317
+ "eval_loss": 0.4314885437488556,
2318
+ "eval_runtime": 13.0675,
2319
+ "eval_samples_per_second": 89.612,
2320
+ "eval_steps_per_second": 11.249,
2321
+ "step": 3400
2322
+ },
2323
+ {
2324
+ "epoch": 5.56,
2325
+ "learning_rate": 0.0004831261101243339,
2326
+ "loss": 0.4138,
2327
+ "step": 3410
2328
+ },
2329
+ {
2330
+ "epoch": 5.58,
2331
+ "learning_rate": 0.00048134991119005326,
2332
+ "loss": 0.4223,
2333
+ "step": 3420
2334
+ },
2335
+ {
2336
+ "epoch": 5.6,
2337
+ "learning_rate": 0.00047957371225577266,
2338
+ "loss": 0.4108,
2339
+ "step": 3430
2340
+ },
2341
+ {
2342
+ "epoch": 5.61,
2343
+ "learning_rate": 0.000477797513321492,
2344
+ "loss": 0.4065,
2345
+ "step": 3440
2346
+ },
2347
+ {
2348
+ "epoch": 5.63,
2349
+ "learning_rate": 0.00047602131438721133,
2350
+ "loss": 0.4481,
2351
+ "step": 3450
2352
+ },
2353
+ {
2354
+ "epoch": 5.64,
2355
+ "learning_rate": 0.00047424511545293073,
2356
+ "loss": 0.4273,
2357
+ "step": 3460
2358
+ },
2359
+ {
2360
+ "epoch": 5.66,
2361
+ "learning_rate": 0.00047246891651865007,
2362
+ "loss": 0.4267,
2363
+ "step": 3470
2364
+ },
2365
+ {
2366
+ "epoch": 5.68,
2367
+ "learning_rate": 0.00047069271758436946,
2368
+ "loss": 0.4626,
2369
+ "step": 3480
2370
+ },
2371
+ {
2372
+ "epoch": 5.69,
2373
+ "learning_rate": 0.00046891651865008885,
2374
+ "loss": 0.4441,
2375
+ "step": 3490
2376
+ },
2377
+ {
2378
+ "epoch": 5.71,
2379
+ "learning_rate": 0.0004671403197158082,
2380
+ "loss": 0.4086,
2381
+ "step": 3500
2382
+ },
2383
+ {
2384
+ "epoch": 5.71,
2385
+ "eval_loss": 0.4278687536716461,
2386
+ "eval_runtime": 12.9778,
2387
+ "eval_samples_per_second": 90.231,
2388
+ "eval_steps_per_second": 11.327,
2389
+ "step": 3500
2390
  }
2391
  ],
2392
  "logging_steps": 10,
2393
  "max_steps": 6130,
2394
  "num_train_epochs": 10,
2395
  "save_steps": 500,
2396
+ "total_flos": 6614658553872384.0,
2397
  "trial_name": null,
2398
  "trial_params": null
2399
  }