DuongTrongChi commited on
Commit
e104c3b
·
verified ·
1 Parent(s): 681c9eb

Training in progress, step 359, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dff6b0dedb8cd82daf4930c2c298d702697c3a8d49a7cf3e70227e87220ea03
3
  size 60010048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd7a11dd960decfde159b9ffedcb277804a1627b5b44f99755257d42961884c
3
  size 60010048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0b091d67757c0024511fb220800a0f1709b5b2e2e280a5eb53a82c2d0649560
3
  size 30428180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85eceb4a829aa4047fbf635b04070cf32f5480f395bbe1d6b5df070c2e3a1aac
3
  size 30428180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:248bdad94673d0171b6613a2054004ce1fbd7cc6609011663f62eb8bd70a3480
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dcd05dfbbdeba643c656ed11b36a8a6487d3151c9ac5ff333ebbd78351d6657
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9465020576131687,
5
  "eval_steps": 500,
6
- "global_step": 345,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2422,6 +2422,104 @@
2422
  "learning_rate": 1.4393939393939396e-05,
2423
  "loss": 1.1526,
2424
  "step": 345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2425
  }
2426
  ],
2427
  "logging_steps": 1,
@@ -2441,7 +2539,7 @@
2441
  "attributes": {}
2442
  }
2443
  },
2444
- "total_flos": 4.21047247978709e+17,
2445
  "train_batch_size": 4,
2446
  "trial_name": null,
2447
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9849108367626886,
5
  "eval_steps": 500,
6
+ "global_step": 359,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2422
  "learning_rate": 1.4393939393939396e-05,
2423
  "loss": 1.1526,
2424
  "step": 345
2425
+ },
2426
+ {
2427
+ "epoch": 0.9492455418381345,
2428
+ "grad_norm": 0.14177238941192627,
2429
+ "learning_rate": 1.3636363636363637e-05,
2430
+ "loss": 1.132,
2431
+ "step": 346
2432
+ },
2433
+ {
2434
+ "epoch": 0.9519890260631001,
2435
+ "grad_norm": 0.13835884630680084,
2436
+ "learning_rate": 1.287878787878788e-05,
2437
+ "loss": 1.1599,
2438
+ "step": 347
2439
+ },
2440
+ {
2441
+ "epoch": 0.9547325102880658,
2442
+ "grad_norm": 0.14390669763088226,
2443
+ "learning_rate": 1.2121212121212122e-05,
2444
+ "loss": 1.15,
2445
+ "step": 348
2446
+ },
2447
+ {
2448
+ "epoch": 0.9574759945130316,
2449
+ "grad_norm": 0.14811821281909943,
2450
+ "learning_rate": 1.1363636363636365e-05,
2451
+ "loss": 1.0759,
2452
+ "step": 349
2453
+ },
2454
+ {
2455
+ "epoch": 0.9602194787379973,
2456
+ "grad_norm": 0.14959345757961273,
2457
+ "learning_rate": 1.0606060606060607e-05,
2458
+ "loss": 1.126,
2459
+ "step": 350
2460
+ },
2461
+ {
2462
+ "epoch": 0.9629629629629629,
2463
+ "grad_norm": 0.14656995236873627,
2464
+ "learning_rate": 9.848484848484848e-06,
2465
+ "loss": 1.1341,
2466
+ "step": 351
2467
+ },
2468
+ {
2469
+ "epoch": 0.9657064471879286,
2470
+ "grad_norm": 0.14695106446743011,
2471
+ "learning_rate": 9.090909090909091e-06,
2472
+ "loss": 1.1259,
2473
+ "step": 352
2474
+ },
2475
+ {
2476
+ "epoch": 0.9684499314128944,
2477
+ "grad_norm": 0.14155460894107819,
2478
+ "learning_rate": 8.333333333333334e-06,
2479
+ "loss": 1.1503,
2480
+ "step": 353
2481
+ },
2482
+ {
2483
+ "epoch": 0.9711934156378601,
2484
+ "grad_norm": 0.1382407397031784,
2485
+ "learning_rate": 7.5757575757575764e-06,
2486
+ "loss": 1.1417,
2487
+ "step": 354
2488
+ },
2489
+ {
2490
+ "epoch": 0.9739368998628258,
2491
+ "grad_norm": 0.14089229702949524,
2492
+ "learning_rate": 6.818181818181818e-06,
2493
+ "loss": 1.1551,
2494
+ "step": 355
2495
+ },
2496
+ {
2497
+ "epoch": 0.9766803840877915,
2498
+ "grad_norm": 0.14886945486068726,
2499
+ "learning_rate": 6.060606060606061e-06,
2500
+ "loss": 1.0973,
2501
+ "step": 356
2502
+ },
2503
+ {
2504
+ "epoch": 0.9794238683127572,
2505
+ "grad_norm": 0.1485728621482849,
2506
+ "learning_rate": 5.303030303030304e-06,
2507
+ "loss": 1.1028,
2508
+ "step": 357
2509
+ },
2510
+ {
2511
+ "epoch": 0.9821673525377229,
2512
+ "grad_norm": 0.1496025174856186,
2513
+ "learning_rate": 4.5454545454545455e-06,
2514
+ "loss": 1.0941,
2515
+ "step": 358
2516
+ },
2517
+ {
2518
+ "epoch": 0.9849108367626886,
2519
+ "grad_norm": 0.1394403725862503,
2520
+ "learning_rate": 3.7878787878787882e-06,
2521
+ "loss": 1.1452,
2522
+ "step": 359
2523
  }
2524
  ],
2525
  "logging_steps": 1,
 
2539
  "attributes": {}
2540
  }
2541
  },
2542
+ "total_flos": 4.3742608796698214e+17,
2543
  "train_batch_size": 4,
2544
  "trial_name": null,
2545
  "trial_params": null