MohamedAhmedAE commited on
Commit
b1984dd
·
verified ·
1 Parent(s): 1a1105b

Training in progress, step 38600, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
24
- "gate_proj",
25
- "v_proj",
26
- "down_proj",
27
  "up_proj",
 
 
28
  "q_proj",
29
- "k_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
 
 
 
24
  "up_proj",
25
+ "v_proj",
26
+ "gate_proj",
27
  "q_proj",
28
+ "k_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a9c4e19223b95fb5758c3ee2527d0abc53c761261cd302c9e6121979523da93
3
  size 778096664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be0e066e83eb70c8a860b58a923a975fe6a2d64aee934a2b357e59d232707fa1
3
  size 778096664
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc6513a85bf522d3571cc919b7acbd2c16e6d887df27f7af012d7ddd099d4c3
3
  size 396581506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a106a17c115a2e9fff16d6446794d7d6fbe741348c1aa6c74b42543757baaa4a
3
  size 396581506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adde30e974d4a4ac089075ed12c4f52f5d65760a545bb6c18cbe5820c191fefe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afd281dafdd97beb15aaaf8b2ec92cb9800a347ae9bb7063ac2a5052be62319
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c088826c17789740316b8318f190a406722ca97fa4cf22bb36e1b18efcb243c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599df61887dcd53cea391cff665b823e3f91f12b6512b5072d914d3c9170f985
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.052300236742029084,
5
  "eval_steps": 500,
6
- "global_step": 37600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2639,6 +2639,76 @@
2639
  "learning_rate": 1.9865354891677735e-05,
2640
  "loss": 1.8286,
2641
  "step": 37600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2642
  }
2643
  ],
2644
  "logging_steps": 100,
@@ -2658,7 +2728,7 @@
2658
  "attributes": {}
2659
  }
2660
  },
2661
- "total_flos": 2.5848762127644672e+17,
2662
  "train_batch_size": 2,
2663
  "trial_name": null,
2664
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05369120048516816,
5
  "eval_steps": 500,
6
+ "global_step": 38600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2639
  "learning_rate": 1.9865354891677735e-05,
2640
  "loss": 1.8286,
2641
  "step": 37600
2642
+ },
2643
+ {
2644
+ "epoch": 0.05243933311634299,
2645
+ "grad_norm": 1.9087238311767578,
2646
+ "learning_rate": 1.98646392688979e-05,
2647
+ "loss": 1.8904,
2648
+ "step": 37700
2649
+ },
2650
+ {
2651
+ "epoch": 0.0525784294906569,
2652
+ "grad_norm": 2.815120220184326,
2653
+ "learning_rate": 1.9863921762388266e-05,
2654
+ "loss": 1.9064,
2655
+ "step": 37800
2656
+ },
2657
+ {
2658
+ "epoch": 0.052717525864970805,
2659
+ "grad_norm": 2.9733479022979736,
2660
+ "learning_rate": 1.986320237228585e-05,
2661
+ "loss": 1.9261,
2662
+ "step": 37900
2663
+ },
2664
+ {
2665
+ "epoch": 0.05285662223928471,
2666
+ "grad_norm": 2.9672937393188477,
2667
+ "learning_rate": 1.9862481098728022e-05,
2668
+ "loss": 1.8735,
2669
+ "step": 38000
2670
+ },
2671
+ {
2672
+ "epoch": 0.05299571861359862,
2673
+ "grad_norm": 2.93483567237854,
2674
+ "learning_rate": 1.9861757941852518e-05,
2675
+ "loss": 1.8418,
2676
+ "step": 38100
2677
+ },
2678
+ {
2679
+ "epoch": 0.053134814987912525,
2680
+ "grad_norm": 4.23644495010376,
2681
+ "learning_rate": 1.9861032901797425e-05,
2682
+ "loss": 1.8763,
2683
+ "step": 38200
2684
+ },
2685
+ {
2686
+ "epoch": 0.05327391136222643,
2687
+ "grad_norm": 3.7690341472625732,
2688
+ "learning_rate": 1.98603059787012e-05,
2689
+ "loss": 1.9315,
2690
+ "step": 38300
2691
+ },
2692
+ {
2693
+ "epoch": 0.05341300773654034,
2694
+ "grad_norm": 2.4336416721343994,
2695
+ "learning_rate": 1.9859577172702658e-05,
2696
+ "loss": 1.9413,
2697
+ "step": 38400
2698
+ },
2699
+ {
2700
+ "epoch": 0.053552104110854246,
2701
+ "grad_norm": 3.6931252479553223,
2702
+ "learning_rate": 1.9858846483940964e-05,
2703
+ "loss": 1.8801,
2704
+ "step": 38500
2705
+ },
2706
+ {
2707
+ "epoch": 0.05369120048516816,
2708
+ "grad_norm": 3.142920970916748,
2709
+ "learning_rate": 1.9858113912555646e-05,
2710
+ "loss": 1.8627,
2711
+ "step": 38600
2712
  }
2713
  ],
2714
  "logging_steps": 100,
 
2728
  "attributes": {}
2729
  }
2730
  },
2731
+ "total_flos": 2.7191656821399552e+17,
2732
  "train_batch_size": 2,
2733
  "trial_name": null,
2734
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a8a868f3f2f56114ee6bc2428526d9923f9b81003f8674ec8d9fc77334d371a
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b747b6f384138ada304508b811385d92b8a2e349ed40953edea15e5db18a6e2b
3
  size 5688