oldiday commited on
Commit
06f9a20
·
verified ·
1 Parent(s): 335e605

Training in progress, step 27, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7a61236ce200669c1f43fef349d35dcf3a9c02bf0598fab22dd9da1fd4bd4b3
3
  size 15156512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:784cc2101a6d62c9decede3bd0dcc8f52ec5491dcf2bf06b8a781a4a8aa641f1
3
  size 15156512
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5a7f83ba2f8b0e4f007030bee96afe30b58611014891556a689b19bb6b078fc
3
  size 7825914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43bec2e7f9bfda48b7faeef54be25f055dab11e39ae17ff9504acfa23c324ba
3
  size 7825914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:151ef2fb6ef7aa1e3d2aba168bc9d53da4ff0613d78aa9606d4f7a2c87414876
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88995ee240ca79abe67b4b9085089095ff68a1140c207890ec746917e6a28ebd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4d20cdfdd80bd4f7336e0d1d85ea3a727e6acd58210dc60f9f2cd451325f653
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e5b5e126f269e3dd52e13e2ce68743557810f4e2bb59a1eda63231c7828066
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00904977375565611,
5
  "eval_steps": 9,
6
- "global_step": 9,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -44,6 +44,64 @@
44
  "eval_samples_per_second": 149.898,
45
  "eval_steps_per_second": 18.793,
46
  "step": 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
  ],
49
  "logging_steps": 3,
@@ -63,7 +121,7 @@
63
  "attributes": {}
64
  }
65
  },
66
- "total_flos": 771412331593728.0,
67
  "train_batch_size": 8,
68
  "trial_name": null,
69
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.027149321266968326,
5
  "eval_steps": 9,
6
+ "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
44
  "eval_samples_per_second": 149.898,
45
  "eval_steps_per_second": 18.793,
46
  "step": 9
47
+ },
48
+ {
49
+ "epoch": 0.012066365007541479,
50
+ "grad_norm": 4.279703617095947,
51
+ "learning_rate": 4.993910125649561e-05,
52
+ "loss": 10.4367,
53
+ "step": 12
54
+ },
55
+ {
56
+ "epoch": 0.015082956259426848,
57
+ "grad_norm": 5.507565021514893,
58
+ "learning_rate": 4.962019382530521e-05,
59
+ "loss": 10.1176,
60
+ "step": 15
61
+ },
62
+ {
63
+ "epoch": 0.01809954751131222,
64
+ "grad_norm": 6.325550079345703,
65
+ "learning_rate": 4.9031542398457974e-05,
66
+ "loss": 9.7615,
67
+ "step": 18
68
+ },
69
+ {
70
+ "epoch": 0.01809954751131222,
71
+ "eval_loss": 9.461087226867676,
72
+ "eval_runtime": 11.1633,
73
+ "eval_samples_per_second": 150.045,
74
+ "eval_steps_per_second": 18.812,
75
+ "step": 18
76
+ },
77
+ {
78
+ "epoch": 0.021116138763197588,
79
+ "grad_norm": 6.642955780029297,
80
+ "learning_rate": 4.817959636416969e-05,
81
+ "loss": 9.3199,
82
+ "step": 21
83
+ },
84
+ {
85
+ "epoch": 0.024132730015082957,
86
+ "grad_norm": 7.369113922119141,
87
+ "learning_rate": 4.707368982147318e-05,
88
+ "loss": 8.8135,
89
+ "step": 24
90
+ },
91
+ {
92
+ "epoch": 0.027149321266968326,
93
+ "grad_norm": 6.994415760040283,
94
+ "learning_rate": 4.572593931387604e-05,
95
+ "loss": 8.3456,
96
+ "step": 27
97
+ },
98
+ {
99
+ "epoch": 0.027149321266968326,
100
+ "eval_loss": 7.989864349365234,
101
+ "eval_runtime": 11.0595,
102
+ "eval_samples_per_second": 151.454,
103
+ "eval_steps_per_second": 18.988,
104
+ "step": 27
105
  }
106
  ],
107
  "logging_steps": 3,
 
121
  "attributes": {}
122
  }
123
  },
124
+ "total_flos": 2314236994781184.0,
125
  "train_batch_size": 8,
126
  "trial_name": null,
127
  "trial_params": null