dada22231 commited on
Commit
979dddb
·
verified ·
1 Parent(s): 72e02e3

Training in progress, step 75, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eca75a43619b328b0443258f8278f9b3a562cbae15a6c0e59428cda3a059049
3
  size 60599872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c3cfbc71baa360ea503a04599efc9dfe299a54d04c47b2f646367599cbac9d
3
  size 60599872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d072ff361a656f4fb6bf1f813c142cd4db273062ba58ca09064e204d22f7b13
3
  size 121392706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af020e95108d540c6d446acbaabbd2498107f9d85933dc962f0469dddc9c4a38
3
  size 121392706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b748591b60e4f7803c525bf83bf57f5919b989bbc618f6902260bf23fc18bb76
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49768a88e541f702eb75d71268964d7cc79b72046465ea5ab4d2f7b2684f932
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b5dc3b0f1acb9ea93b9c6ba18200494da8747f8f051a3e59be97c0c11d47f81
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42e3a6f19e06c36f6182caeab45c4d6cf1899bf44ba738453d442d66c7fa692
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00f4bebdaf9bb1f18217bf256566ced36954aede24a8e4abfda0ad010b774491
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dedb05ed73814bd7342db7ab5d4bec7aa9950516a77a467f7cfda9f6dec31cc9
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:628054e8d99f9f8d0709763ece645782d6401904f6fd702995f088b3fd95396b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8d3317a9d670385f3523e97d1d1073e2b084502a4c464fcf0832f7fe80c1c6
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a06debcc524d5e29377ca1c81e86a2cd28c93506013f68ac7d1bf85491fb4e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:370cee31176b8bff781da8f054b9870dc93c63a8623674218a84718aa7abd3af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.037851009517908096,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.038272933859586175,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,189 @@
381
  "eval_samples_per_second": 51.128,
382
  "eval_steps_per_second": 13.293,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -409,7 +592,7 @@
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 2.978203954891981e+16,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.021306684240698814,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-75",
4
+ "epoch": 0.05740940078937926,
5
  "eval_steps": 25,
6
+ "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 51.128,
382
  "eval_steps_per_second": 13.293,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.0390383925367779,
387
+ "grad_norm": 0.3454943597316742,
388
+ "learning_rate": 5.6472358726979935e-05,
389
+ "loss": 0.2051,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.03980385121396962,
394
+ "grad_norm": 1.5023900270462036,
395
+ "learning_rate": 5.500000000000001e-05,
396
+ "loss": 0.3154,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.040569309891161344,
401
+ "grad_norm": 0.16861777007579803,
402
+ "learning_rate": 5.352764127302008e-05,
403
+ "loss": 0.024,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.04133476856835307,
408
+ "grad_norm": 0.01335059478878975,
409
+ "learning_rate": 5.205685918464356e-05,
410
+ "loss": 0.0005,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.04210022724554479,
415
+ "grad_norm": 0.01417592540383339,
416
+ "learning_rate": 5.058922868516978e-05,
417
+ "loss": 0.0006,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.04286568592273651,
422
+ "grad_norm": 0.010599881410598755,
423
+ "learning_rate": 4.912632135009769e-05,
424
+ "loss": 0.0004,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.043631144599928236,
429
+ "grad_norm": 0.010219581425189972,
430
+ "learning_rate": 4.7669703697243516e-05,
431
+ "loss": 0.0004,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.04439660327711996,
436
+ "grad_norm": 0.008394899778068066,
437
+ "learning_rate": 4.6220935509274235e-05,
438
+ "loss": 0.0004,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.04516206195431169,
443
+ "grad_norm": 0.010442732833325863,
444
+ "learning_rate": 4.478156816345321e-05,
445
+ "loss": 0.0004,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.04592752063150341,
450
+ "grad_norm": 0.01015259325504303,
451
+ "learning_rate": 4.3353142970386564e-05,
452
+ "loss": 0.0004,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.046692979308695134,
457
+ "grad_norm": 0.013029181398451328,
458
+ "learning_rate": 4.19371895235492e-05,
459
+ "loss": 0.0003,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 0.04745843798588686,
464
+ "grad_norm": 0.0073064109310507774,
465
+ "learning_rate": 4.053522406135775e-05,
466
+ "loss": 0.0003,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 0.04822389666307858,
471
+ "grad_norm": 0.2842812240123749,
472
+ "learning_rate": 3.9148747843544495e-05,
473
+ "loss": 0.1657,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 0.0489893553402703,
478
+ "grad_norm": 0.26676300168037415,
479
+ "learning_rate": 3.777924554357096e-05,
480
+ "loss": 0.1517,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 0.049754814017462026,
485
+ "grad_norm": 0.5113205909729004,
486
+ "learning_rate": 3.642818365880224e-05,
487
+ "loss": 0.1416,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 0.05052027269465375,
492
+ "grad_norm": 0.19968412816524506,
493
+ "learning_rate": 3.509700894014496e-05,
494
+ "loss": 0.0041,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 0.05128573137184547,
499
+ "grad_norm": 0.08529309928417206,
500
+ "learning_rate": 3.378714684283011e-05,
501
+ "loss": 0.0021,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 0.052051190049037195,
506
+ "grad_norm": 0.03840894624590874,
507
+ "learning_rate": 3.250000000000001e-05,
508
+ "loss": 0.0011,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 0.05281664872622892,
513
+ "grad_norm": 0.02306171879172325,
514
+ "learning_rate": 3.123694672073344e-05,
515
+ "loss": 0.0006,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 0.05358210740342064,
520
+ "grad_norm": 0.012097193859517574,
521
+ "learning_rate": 2.9999339514117912e-05,
522
+ "loss": 0.0004,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 0.054347566080612364,
527
+ "grad_norm": 0.010973786003887653,
528
+ "learning_rate": 2.8788503640948912e-05,
529
+ "loss": 0.0004,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 0.055113024757804094,
534
+ "grad_norm": 0.01180847268551588,
535
+ "learning_rate": 2.760573569460757e-05,
536
+ "loss": 0.0004,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 0.05587848343499582,
541
+ "grad_norm": 0.010331504046916962,
542
+ "learning_rate": 2.645230221263596e-05,
543
+ "loss": 0.0004,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 0.05664394211218754,
548
+ "grad_norm": 0.012231193482875824,
549
+ "learning_rate": 2.53294383204969e-05,
550
+ "loss": 0.0004,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 0.05740940078937926,
555
+ "grad_norm": 0.009561908431351185,
556
+ "learning_rate": 2.423834640897079e-05,
557
+ "loss": 0.0003,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 0.05740940078937926,
562
+ "eval_loss": 0.021306684240698814,
563
+ "eval_runtime": 1.043,
564
+ "eval_samples_per_second": 47.938,
565
+ "eval_steps_per_second": 12.464,
566
+ "step": 75
567
  }
568
  ],
569
  "logging_steps": 1,
 
592
  "attributes": {}
593
  }
594
  },
595
+ "total_flos": 4.482214844275098e+16,
596
  "train_batch_size": 1,
597
  "trial_name": null,
598
  "trial_params": null