nttx commited on
Commit
af37f5f
·
verified ·
1 Parent(s): d4cfa5f

Training in progress, step 81, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2710a59fecf03a8a8d2164189f7b5a61a2aaa858533a590caaca196180c2dbf
3
  size 838906392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad7089998671b2828991c7ebc54ddec960dd2311ec675a49fb8dbba18ee0c8b1
3
  size 838906392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c7a26fdc4f8ef2a4f46c666afe97085b7025cfb84583f859890f6df4e2a4475
3
  size 1677991354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1320dcafbe36ee032fc41a1f4c1e8baa2eccb06ec41471b2a75409fe051013c4
3
  size 1677991354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb81f89b68366da380b1091c30c5c0ac19c1bdb0cd3f72af8a1d8afc75486321
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d3c925b28227b95a8ac44434affb41453bdddd6810333936ee7941a29ba2b8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbfd42597cb17dd5953648a2f02c05c3e9ff1d1ccc350bee78679ee3cb0d031a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add675dc265850f22b56e2052f74b9f71b110b1c0f63daf3a4fb0e1b958e5b2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9139928817749023,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 1.8785046728971961,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -373,6 +373,223 @@
373
  "eval_samples_per_second": 9.134,
374
  "eval_steps_per_second": 4.669,
375
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 1,
@@ -396,12 +613,12 @@
396
  "should_evaluate": false,
397
  "should_log": false,
398
  "should_save": true,
399
- "should_training_stop": false
400
  },
401
  "attributes": {}
402
  }
403
  },
404
- "total_flos": 1.27296891518976e+17,
405
  "train_batch_size": 8,
406
  "trial_name": null,
407
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9139928817749023,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.05607476635514,
5
  "eval_steps": 50,
6
+ "global_step": 81,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
373
  "eval_samples_per_second": 9.134,
374
  "eval_steps_per_second": 4.669,
375
  "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.9158878504672896,
379
+ "grad_norm": 3.224144220352173,
380
+ "learning_rate": 6.368314950360415e-05,
381
+ "loss": 3.6958,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 1.953271028037383,
386
+ "grad_norm": 3.0531082153320312,
387
+ "learning_rate": 6.069665416032487e-05,
388
+ "loss": 2.5114,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 1.9906542056074765,
393
+ "grad_norm": 3.5681068897247314,
394
+ "learning_rate": 5.7669582743934284e-05,
395
+ "loss": 3.0081,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 2.0373831775700935,
400
+ "grad_norm": 2.371832847595215,
401
+ "learning_rate": 5.4613417973165106e-05,
402
+ "loss": 3.1453,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 2.074766355140187,
407
+ "grad_norm": 2.573709011077881,
408
+ "learning_rate": 5.153975292780853e-05,
409
+ "loss": 2.6346,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 2.1121495327102804,
414
+ "grad_norm": 2.663743019104004,
415
+ "learning_rate": 4.8460247072191496e-05,
416
+ "loss": 1.8031,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 2.149532710280374,
421
+ "grad_norm": 2.9047420024871826,
422
+ "learning_rate": 4.5386582026834906e-05,
423
+ "loss": 1.7328,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 2.1869158878504673,
428
+ "grad_norm": 3.729478359222412,
429
+ "learning_rate": 4.233041725606572e-05,
430
+ "loss": 2.3163,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 2.2242990654205608,
435
+ "grad_norm": 4.952083587646484,
436
+ "learning_rate": 3.930334583967514e-05,
437
+ "loss": 2.5764,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 2.2616822429906542,
442
+ "grad_norm": 4.32457971572876,
443
+ "learning_rate": 3.631685049639586e-05,
444
+ "loss": 3.2292,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 2.2990654205607477,
449
+ "grad_norm": 4.138461112976074,
450
+ "learning_rate": 3.338226002601703e-05,
451
+ "loss": 2.4756,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 2.336448598130841,
456
+ "grad_norm": 4.143182277679443,
457
+ "learning_rate": 3.0510706335366035e-05,
458
+ "loss": 1.8892,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 2.3738317757009346,
463
+ "grad_norm": 4.396746635437012,
464
+ "learning_rate": 2.771308221117309e-05,
465
+ "loss": 1.9185,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 2.411214953271028,
470
+ "grad_norm": 4.355361461639404,
471
+ "learning_rate": 2.500000000000001e-05,
472
+ "loss": 2.1326,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 2.4485981308411215,
477
+ "grad_norm": 4.803137302398682,
478
+ "learning_rate": 2.238175135197471e-05,
479
+ "loss": 2.3713,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 2.485981308411215,
484
+ "grad_norm": 3.864950656890869,
485
+ "learning_rate": 1.9868268181037185e-05,
486
+ "loss": 2.9682,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 2.5233644859813085,
491
+ "grad_norm": 3.767958641052246,
492
+ "learning_rate": 1.746908498978791e-05,
493
+ "loss": 2.2154,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 2.560747663551402,
498
+ "grad_norm": 3.542163133621216,
499
+ "learning_rate": 1.5193302701853673e-05,
500
+ "loss": 1.3826,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 2.5981308411214954,
505
+ "grad_norm": 4.084665775299072,
506
+ "learning_rate": 1.3049554138967051e-05,
507
+ "loss": 1.9291,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 2.635514018691589,
512
+ "grad_norm": 3.9961540699005127,
513
+ "learning_rate": 1.1045971273716477e-05,
514
+ "loss": 1.8701,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 2.6728971962616823,
519
+ "grad_norm": 4.667150020599365,
520
+ "learning_rate": 9.190154382188921e-06,
521
+ "loss": 2.4681,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 2.710280373831776,
526
+ "grad_norm": 3.48500919342041,
527
+ "learning_rate": 7.489143213519301e-06,
528
+ "loss": 2.9565,
529
+ "step": 72
530
+ },
531
+ {
532
+ "epoch": 2.7476635514018692,
533
+ "grad_norm": 3.8385698795318604,
534
+ "learning_rate": 5.949390285710776e-06,
535
+ "loss": 2.269,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 2.7850467289719627,
540
+ "grad_norm": 3.418334484100342,
541
+ "learning_rate": 4.576736409023813e-06,
542
+ "loss": 1.5572,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 2.822429906542056,
547
+ "grad_norm": 3.6133151054382324,
548
+ "learning_rate": 3.376388529782215e-06,
549
+ "loss": 1.511,
550
+ "step": 75
551
+ },
552
+ {
553
+ "epoch": 2.8598130841121496,
554
+ "grad_norm": 4.219501972198486,
555
+ "learning_rate": 2.3528999786421756e-06,
556
+ "loss": 1.7603,
557
+ "step": 76
558
+ },
559
+ {
560
+ "epoch": 2.897196261682243,
561
+ "grad_norm": 4.413926601409912,
562
+ "learning_rate": 1.5101531982495308e-06,
563
+ "loss": 2.251,
564
+ "step": 77
565
+ },
566
+ {
567
+ "epoch": 2.9345794392523366,
568
+ "grad_norm": 3.3239150047302246,
569
+ "learning_rate": 8.513450158049108e-07,
570
+ "loss": 2.2245,
571
+ "step": 78
572
+ },
573
+ {
574
+ "epoch": 2.97196261682243,
575
+ "grad_norm": 3.768557548522949,
576
+ "learning_rate": 3.7897451640321323e-07,
577
+ "loss": 1.7201,
578
+ "step": 79
579
+ },
580
+ {
581
+ "epoch": 3.0186915887850465,
582
+ "grad_norm": 3.955902099609375,
583
+ "learning_rate": 9.483356314779479e-08,
584
+ "loss": 2.6022,
585
+ "step": 80
586
+ },
587
+ {
588
+ "epoch": 3.05607476635514,
589
+ "grad_norm": 3.3533103466033936,
590
+ "learning_rate": 0.0,
591
+ "loss": 2.3736,
592
+ "step": 81
593
  }
594
  ],
595
  "logging_steps": 1,
 
613
  "should_evaluate": false,
614
  "should_log": false,
615
  "should_save": true,
616
+ "should_training_stop": true
617
  },
618
  "attributes": {}
619
  }
620
  },
621
+ "total_flos": 2.0622096426074112e+17,
622
  "train_batch_size": 8,
623
  "trial_name": null,
624
  "trial_params": null