vaatsav06 commited on
Commit
d21c41f
·
verified ·
1 Parent(s): 5a554c2

Training in progress, step 1042, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cb5c823ef74dfadc52b27febc38a2ac3a875bb51704bb51249b3924e6ee6f2b
3
  size 268858112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4dee753b9fd5e325d98ea73fa4a364829616b41c3c5b0874fed1e35477e96f8
3
  size 268858112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35bbf0d8ee6289b2e585cda8bdd7541a68981e8fbe801a50960d7cef7275b8b9
3
  size 137668197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac8d611eadf762b6417b8fc45f0f3bd33ee0cd4b6a0553f1069d9d3b34fe36c
3
  size 137668197
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfad65ecdebf4d2a4ef23aa53c40f8dd23f710c224d047e20d32dc51c4015de2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53371309ce8a2079e2c22bdc9bc5a9a1a2387150aad0de6421e37a4c6f73ba93
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9596928982725528,
6
  "eval_steps": 500,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1408,6 +1408,62 @@
1408
  "learning_rate": 0.0001,
1409
  "loss": 0.2554,
1410
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1411
  }
1412
  ],
1413
  "logging_steps": 5,
@@ -1422,12 +1478,12 @@
1422
  "should_evaluate": false,
1423
  "should_log": false,
1424
  "should_save": true,
1425
- "should_training_stop": false
1426
  },
1427
  "attributes": {}
1428
  }
1429
  },
1430
- "total_flos": 3.323888912649138e+17,
1431
  "train_batch_size": 24,
1432
  "trial_name": null,
1433
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 1042,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1408
  "learning_rate": 0.0001,
1409
  "loss": 0.2554,
1410
  "step": 1000
1411
+ },
1412
+ {
1413
+ "epoch": 0.9644913627639156,
1414
+ "grad_norm": 0.1351858526468277,
1415
+ "learning_rate": 0.0001,
1416
+ "loss": 0.2633,
1417
+ "step": 1005
1418
+ },
1419
+ {
1420
+ "epoch": 0.9692898272552783,
1421
+ "grad_norm": 0.1442970633506775,
1422
+ "learning_rate": 0.0001,
1423
+ "loss": 0.2579,
1424
+ "step": 1010
1425
+ },
1426
+ {
1427
+ "epoch": 0.974088291746641,
1428
+ "grad_norm": 0.1590578258037567,
1429
+ "learning_rate": 0.0001,
1430
+ "loss": 0.2843,
1431
+ "step": 1015
1432
+ },
1433
+ {
1434
+ "epoch": 0.9788867562380038,
1435
+ "grad_norm": 0.15692369639873505,
1436
+ "learning_rate": 0.0001,
1437
+ "loss": 0.2669,
1438
+ "step": 1020
1439
+ },
1440
+ {
1441
+ "epoch": 0.9836852207293666,
1442
+ "grad_norm": 0.13090096414089203,
1443
+ "learning_rate": 0.0001,
1444
+ "loss": 0.2589,
1445
+ "step": 1025
1446
+ },
1447
+ {
1448
+ "epoch": 0.9884836852207294,
1449
+ "grad_norm": 0.1544967144727707,
1450
+ "learning_rate": 0.0001,
1451
+ "loss": 0.2686,
1452
+ "step": 1030
1453
+ },
1454
+ {
1455
+ "epoch": 0.9932821497120922,
1456
+ "grad_norm": 0.14533308148384094,
1457
+ "learning_rate": 0.0001,
1458
+ "loss": 0.2666,
1459
+ "step": 1035
1460
+ },
1461
+ {
1462
+ "epoch": 0.9980806142034548,
1463
+ "grad_norm": 0.13683825731277466,
1464
+ "learning_rate": 0.0001,
1465
+ "loss": 0.2695,
1466
+ "step": 1040
1467
  }
1468
  ],
1469
  "logging_steps": 5,
 
1478
  "should_evaluate": false,
1479
  "should_log": false,
1480
  "should_save": true,
1481
+ "should_training_stop": true
1482
  },
1483
  "attributes": {}
1484
  }
1485
  },
1486
+ "total_flos": 3.461829523913416e+17,
1487
  "train_batch_size": 24,
1488
  "trial_name": null,
1489
  "trial_params": null