somaia02 commited on
Commit
500e870
·
1 Parent(s): 7e962f3

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a58fa9dcca1c27ffd494a46534c40e8cf04faf98e8b281458e05b5136ae9fdcb
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264ecec36fa0a7877a74e5bc90de5cfe426844837e851cd04e19bd3e592e07d9
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c03e500ed8e4953132e62be3704d6bfdca68f7a406db1b6ee83e2921feef9003
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9825a8978c13d22cdcc9972f981eca76a202db226c336284f26d0d6ae781e227
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f82adac821233515f57963faf84277e6be21f1e14004a972d38969d3b12b54c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d122c28c5fc3f53d9d112345e4abc97c9cbf42b6d6c3da282213a9e89d67c386
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf79ba88210639e0b725ee3ca8af70f266780a8aabbf9d25faf56fd6dd10d11b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c35dfe7906d79114431a0065ffa36a90ef274205cc0fefa7802dd197d609956
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.45563551783561707,
3
  "best_model_checkpoint": "bart_lora_outputs\\checkpoint-1500",
4
- "epoch": 2.4469820554649266,
5
  "eval_steps": 100,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1027,13 +1027,353 @@
1027
  "eval_samples_per_second": 89.239,
1028
  "eval_steps_per_second": 11.202,
1029
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1030
  }
1031
  ],
1032
  "logging_steps": 10,
1033
  "max_steps": 6130,
1034
  "num_train_epochs": 10,
1035
  "save_steps": 500,
1036
- "total_flos": 2833006835073024.0,
1037
  "trial_name": null,
1038
  "trial_params": null
1039
  }
 
1
  {
2
  "best_metric": 0.45563551783561707,
3
  "best_model_checkpoint": "bart_lora_outputs\\checkpoint-1500",
4
+ "epoch": 3.262642740619902,
5
  "eval_steps": 100,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1027
  "eval_samples_per_second": 89.239,
1028
  "eval_steps_per_second": 11.202,
1029
  "step": 1500
1030
+ },
1031
+ {
1032
+ "epoch": 2.46,
1033
+ "learning_rate": 0.0008206039076376555,
1034
+ "loss": 0.4859,
1035
+ "step": 1510
1036
+ },
1037
+ {
1038
+ "epoch": 2.48,
1039
+ "learning_rate": 0.0008188277087033748,
1040
+ "loss": 0.5359,
1041
+ "step": 1520
1042
+ },
1043
+ {
1044
+ "epoch": 2.5,
1045
+ "learning_rate": 0.0008170515097690942,
1046
+ "loss": 0.4693,
1047
+ "step": 1530
1048
+ },
1049
+ {
1050
+ "epoch": 2.51,
1051
+ "learning_rate": 0.0008152753108348136,
1052
+ "loss": 0.4506,
1053
+ "step": 1540
1054
+ },
1055
+ {
1056
+ "epoch": 2.53,
1057
+ "learning_rate": 0.0008134991119005329,
1058
+ "loss": 0.4449,
1059
+ "step": 1550
1060
+ },
1061
+ {
1062
+ "epoch": 2.54,
1063
+ "learning_rate": 0.0008117229129662523,
1064
+ "loss": 0.4976,
1065
+ "step": 1560
1066
+ },
1067
+ {
1068
+ "epoch": 2.56,
1069
+ "learning_rate": 0.0008099467140319717,
1070
+ "loss": 0.4781,
1071
+ "step": 1570
1072
+ },
1073
+ {
1074
+ "epoch": 2.58,
1075
+ "learning_rate": 0.0008081705150976909,
1076
+ "loss": 0.4755,
1077
+ "step": 1580
1078
+ },
1079
+ {
1080
+ "epoch": 2.59,
1081
+ "learning_rate": 0.0008063943161634103,
1082
+ "loss": 0.5088,
1083
+ "step": 1590
1084
+ },
1085
+ {
1086
+ "epoch": 2.61,
1087
+ "learning_rate": 0.0008046181172291297,
1088
+ "loss": 0.4777,
1089
+ "step": 1600
1090
+ },
1091
+ {
1092
+ "epoch": 2.61,
1093
+ "eval_loss": 0.46053746342658997,
1094
+ "eval_runtime": 13.1201,
1095
+ "eval_samples_per_second": 89.252,
1096
+ "eval_steps_per_second": 11.204,
1097
+ "step": 1600
1098
+ },
1099
+ {
1100
+ "epoch": 2.63,
1101
+ "learning_rate": 0.0008028419182948491,
1102
+ "loss": 0.4648,
1103
+ "step": 1610
1104
+ },
1105
+ {
1106
+ "epoch": 2.64,
1107
+ "learning_rate": 0.0008010657193605684,
1108
+ "loss": 0.5066,
1109
+ "step": 1620
1110
+ },
1111
+ {
1112
+ "epoch": 2.66,
1113
+ "learning_rate": 0.0007992895204262878,
1114
+ "loss": 0.5127,
1115
+ "step": 1630
1116
+ },
1117
+ {
1118
+ "epoch": 2.68,
1119
+ "learning_rate": 0.0007975133214920072,
1120
+ "loss": 0.5136,
1121
+ "step": 1640
1122
+ },
1123
+ {
1124
+ "epoch": 2.69,
1125
+ "learning_rate": 0.0007957371225577265,
1126
+ "loss": 0.4911,
1127
+ "step": 1650
1128
+ },
1129
+ {
1130
+ "epoch": 2.71,
1131
+ "learning_rate": 0.0007939609236234459,
1132
+ "loss": 0.4537,
1133
+ "step": 1660
1134
+ },
1135
+ {
1136
+ "epoch": 2.72,
1137
+ "learning_rate": 0.0007921847246891653,
1138
+ "loss": 0.4958,
1139
+ "step": 1670
1140
+ },
1141
+ {
1142
+ "epoch": 2.74,
1143
+ "learning_rate": 0.0007904085257548845,
1144
+ "loss": 0.4997,
1145
+ "step": 1680
1146
+ },
1147
+ {
1148
+ "epoch": 2.76,
1149
+ "learning_rate": 0.0007886323268206039,
1150
+ "loss": 0.5192,
1151
+ "step": 1690
1152
+ },
1153
+ {
1154
+ "epoch": 2.77,
1155
+ "learning_rate": 0.0007868561278863233,
1156
+ "loss": 0.4977,
1157
+ "step": 1700
1158
+ },
1159
+ {
1160
+ "epoch": 2.77,
1161
+ "eval_loss": 0.4585750102996826,
1162
+ "eval_runtime": 13.1101,
1163
+ "eval_samples_per_second": 89.32,
1164
+ "eval_steps_per_second": 11.213,
1165
+ "step": 1700
1166
+ },
1167
+ {
1168
+ "epoch": 2.79,
1169
+ "learning_rate": 0.0007850799289520426,
1170
+ "loss": 0.5044,
1171
+ "step": 1710
1172
+ },
1173
+ {
1174
+ "epoch": 2.81,
1175
+ "learning_rate": 0.000783303730017762,
1176
+ "loss": 0.4839,
1177
+ "step": 1720
1178
+ },
1179
+ {
1180
+ "epoch": 2.82,
1181
+ "learning_rate": 0.0007815275310834814,
1182
+ "loss": 0.5234,
1183
+ "step": 1730
1184
+ },
1185
+ {
1186
+ "epoch": 2.84,
1187
+ "learning_rate": 0.0007797513321492007,
1188
+ "loss": 0.4835,
1189
+ "step": 1740
1190
+ },
1191
+ {
1192
+ "epoch": 2.85,
1193
+ "learning_rate": 0.0007779751332149201,
1194
+ "loss": 0.4939,
1195
+ "step": 1750
1196
+ },
1197
+ {
1198
+ "epoch": 2.87,
1199
+ "learning_rate": 0.0007761989342806395,
1200
+ "loss": 0.554,
1201
+ "step": 1760
1202
+ },
1203
+ {
1204
+ "epoch": 2.89,
1205
+ "learning_rate": 0.0007744227353463588,
1206
+ "loss": 0.4643,
1207
+ "step": 1770
1208
+ },
1209
+ {
1210
+ "epoch": 2.9,
1211
+ "learning_rate": 0.0007726465364120782,
1212
+ "loss": 0.4909,
1213
+ "step": 1780
1214
+ },
1215
+ {
1216
+ "epoch": 2.92,
1217
+ "learning_rate": 0.0007708703374777975,
1218
+ "loss": 0.5042,
1219
+ "step": 1790
1220
+ },
1221
+ {
1222
+ "epoch": 2.94,
1223
+ "learning_rate": 0.0007690941385435168,
1224
+ "loss": 0.4772,
1225
+ "step": 1800
1226
+ },
1227
+ {
1228
+ "epoch": 2.94,
1229
+ "eval_loss": 0.44169098138809204,
1230
+ "eval_runtime": 13.0107,
1231
+ "eval_samples_per_second": 90.003,
1232
+ "eval_steps_per_second": 11.298,
1233
+ "step": 1800
1234
+ },
1235
+ {
1236
+ "epoch": 2.95,
1237
+ "learning_rate": 0.0007673179396092362,
1238
+ "loss": 0.4529,
1239
+ "step": 1810
1240
+ },
1241
+ {
1242
+ "epoch": 2.97,
1243
+ "learning_rate": 0.0007655417406749556,
1244
+ "loss": 0.5058,
1245
+ "step": 1820
1246
+ },
1247
+ {
1248
+ "epoch": 2.99,
1249
+ "learning_rate": 0.0007637655417406749,
1250
+ "loss": 0.4794,
1251
+ "step": 1830
1252
+ },
1253
+ {
1254
+ "epoch": 3.0,
1255
+ "learning_rate": 0.0007619893428063943,
1256
+ "loss": 0.477,
1257
+ "step": 1840
1258
+ },
1259
+ {
1260
+ "epoch": 3.02,
1261
+ "learning_rate": 0.0007602131438721137,
1262
+ "loss": 0.4663,
1263
+ "step": 1850
1264
+ },
1265
+ {
1266
+ "epoch": 3.03,
1267
+ "learning_rate": 0.0007584369449378331,
1268
+ "loss": 0.4634,
1269
+ "step": 1860
1270
+ },
1271
+ {
1272
+ "epoch": 3.05,
1273
+ "learning_rate": 0.0007566607460035524,
1274
+ "loss": 0.5159,
1275
+ "step": 1870
1276
+ },
1277
+ {
1278
+ "epoch": 3.07,
1279
+ "learning_rate": 0.0007548845470692718,
1280
+ "loss": 0.4818,
1281
+ "step": 1880
1282
+ },
1283
+ {
1284
+ "epoch": 3.08,
1285
+ "learning_rate": 0.0007531083481349912,
1286
+ "loss": 0.4747,
1287
+ "step": 1890
1288
+ },
1289
+ {
1290
+ "epoch": 3.1,
1291
+ "learning_rate": 0.0007513321492007104,
1292
+ "loss": 0.4605,
1293
+ "step": 1900
1294
+ },
1295
+ {
1296
+ "epoch": 3.1,
1297
+ "eval_loss": 0.4483106732368469,
1298
+ "eval_runtime": 13.1231,
1299
+ "eval_samples_per_second": 89.232,
1300
+ "eval_steps_per_second": 11.202,
1301
+ "step": 1900
1302
+ },
1303
+ {
1304
+ "epoch": 3.12,
1305
+ "learning_rate": 0.0007495559502664298,
1306
+ "loss": 0.474,
1307
+ "step": 1910
1308
+ },
1309
+ {
1310
+ "epoch": 3.13,
1311
+ "learning_rate": 0.0007477797513321492,
1312
+ "loss": 0.4639,
1313
+ "step": 1920
1314
+ },
1315
+ {
1316
+ "epoch": 3.15,
1317
+ "learning_rate": 0.0007460035523978685,
1318
+ "loss": 0.4485,
1319
+ "step": 1930
1320
+ },
1321
+ {
1322
+ "epoch": 3.16,
1323
+ "learning_rate": 0.0007442273534635879,
1324
+ "loss": 0.4835,
1325
+ "step": 1940
1326
+ },
1327
+ {
1328
+ "epoch": 3.18,
1329
+ "learning_rate": 0.0007424511545293073,
1330
+ "loss": 0.4805,
1331
+ "step": 1950
1332
+ },
1333
+ {
1334
+ "epoch": 3.2,
1335
+ "learning_rate": 0.0007406749555950266,
1336
+ "loss": 0.4818,
1337
+ "step": 1960
1338
+ },
1339
+ {
1340
+ "epoch": 3.21,
1341
+ "learning_rate": 0.000738898756660746,
1342
+ "loss": 0.4921,
1343
+ "step": 1970
1344
+ },
1345
+ {
1346
+ "epoch": 3.23,
1347
+ "learning_rate": 0.0007371225577264654,
1348
+ "loss": 0.4601,
1349
+ "step": 1980
1350
+ },
1351
+ {
1352
+ "epoch": 3.25,
1353
+ "learning_rate": 0.0007353463587921847,
1354
+ "loss": 0.4771,
1355
+ "step": 1990
1356
+ },
1357
+ {
1358
+ "epoch": 3.26,
1359
+ "learning_rate": 0.000733570159857904,
1360
+ "loss": 0.4487,
1361
+ "step": 2000
1362
+ },
1363
+ {
1364
+ "epoch": 3.26,
1365
+ "eval_loss": 0.46053341031074524,
1366
+ "eval_runtime": 12.936,
1367
+ "eval_samples_per_second": 90.523,
1368
+ "eval_steps_per_second": 11.364,
1369
+ "step": 2000
1370
  }
1371
  ],
1372
  "logging_steps": 10,
1373
  "max_steps": 6130,
1374
  "num_train_epochs": 10,
1375
  "save_steps": 500,
1376
+ "total_flos": 3778253218971648.0,
1377
  "trial_name": null,
1378
  "trial_params": null
1379
  }