DuongTrongChi commited on
Commit
2d63260
·
verified ·
1 Parent(s): 60fa50e

Training in progress, step 203, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2578d0630ecf659de062330437f2ab118f2bb6371e98b5d7f02cd4c84996cced
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f673b7e0040e60617c69a88706873642e60534ff290592fcc0195a9df7ab81e
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f0afae3ebba9d0af5bee6787ab79cbf9170b2f759d14ea54267d8db2a83507f
3
  size 37430836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f082ea210fd68387405176cad6ee88cfbd401951351d7c95447ac68b86b16bb0
3
  size 37430836
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26a269a2cea20ade02f5153afabb9927aa1d806bfa877265376ea45d5823b7de
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec95e73a62601dbe96f98abb332829d9f2af7677329a53ab141fd2b0805c4d17
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5404663923182441,
5
  "eval_steps": 500,
6
- "global_step": 197,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1386,6 +1386,48 @@
1386
  "learning_rate": 0.00012651515151515153,
1387
  "loss": 1.2623,
1388
  "step": 197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
  }
1390
  ],
1391
  "logging_steps": 1,
@@ -1405,7 +1447,7 @@
1405
  "attributes": {}
1406
  }
1407
  },
1408
- "total_flos": 2.4436699819317658e+17,
1409
  "train_batch_size": 4,
1410
  "trial_name": null,
1411
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5569272976680384,
5
  "eval_steps": 500,
6
+ "global_step": 203,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1386
  "learning_rate": 0.00012651515151515153,
1387
  "loss": 1.2623,
1388
  "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 0.5432098765432098,
1392
+ "grad_norm": 0.15332822501659393,
1393
+ "learning_rate": 0.00012575757575757575,
1394
+ "loss": 1.2113,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 0.5459533607681756,
1399
+ "grad_norm": 0.15453462302684784,
1400
+ "learning_rate": 0.000125,
1401
+ "loss": 1.157,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 0.5486968449931413,
1406
+ "grad_norm": 0.18716047704219818,
1407
+ "learning_rate": 0.00012424242424242425,
1408
+ "loss": 1.1896,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 0.551440329218107,
1413
+ "grad_norm": 0.1628562957048416,
1414
+ "learning_rate": 0.0001234848484848485,
1415
+ "loss": 1.2035,
1416
+ "step": 201
1417
+ },
1418
+ {
1419
+ "epoch": 0.5541838134430727,
1420
+ "grad_norm": 0.15352866053581238,
1421
+ "learning_rate": 0.00012272727272727272,
1422
+ "loss": 1.2529,
1423
+ "step": 202
1424
+ },
1425
+ {
1426
+ "epoch": 0.5569272976680384,
1427
+ "grad_norm": 0.1607903093099594,
1428
+ "learning_rate": 0.00012196969696969697,
1429
+ "loss": 1.224,
1430
+ "step": 203
1431
  }
1432
  ],
1433
  "logging_steps": 1,
 
1447
  "attributes": {}
1448
  }
1449
  },
1450
+ "total_flos": 2.5176887757982925e+17,
1451
  "train_batch_size": 4,
1452
  "trial_name": null,
1453
  "trial_params": null