mrferr3t commited on
Commit
4a6ce62
·
verified ·
1 Parent(s): a15e7ee

Training in progress, step 12, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb755141d71b34746981158d735bf6eebdb682c01a6703f9cba021b623aa1c4a
3
  size 41581360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f48c348a378b36fa2a4f7311f920ac4c6d290ec3d9d19a69e92acdbde3a5972c
3
  size 41581360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e95636f2c1b9976bb322fd885d65e4d65db798b6144752fa532461c8a881fce4
3
  size 21505540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e475fa23046df7e5979e96e83553c8d3af18145688794e1784ce7a5e5d4707
3
  size 21505540
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:530eddfbd2f0e25306cd6d0a8655a3cfe1e1af46c6a93abefd8982a321b96e39
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83eb8ccd812e2a94dfc483ddae0d9b7b7ab4077cf01e4da59286cd13ea8d6029
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0099cb7287625b29b67c4fcf42ff20fae623b429bfb10f5ac695bc54f2be54fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183ab984e067f0684f97cf6c258fa276e42ea5c29910668cb653a16e870010e9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.015267175572519083,
5
  "eval_steps": 6,
6
- "global_step": 6,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -65,6 +65,56 @@
65
  "eval_samples_per_second": 21.797,
66
  "eval_steps_per_second": 10.899,
67
  "step": 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
  ],
70
  "logging_steps": 1,
@@ -84,7 +134,7 @@
84
  "attributes": {}
85
  }
86
  },
87
- "total_flos": 300058398425088.0,
88
  "train_batch_size": 2,
89
  "trial_name": null,
90
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.030534351145038167,
5
  "eval_steps": 6,
6
+ "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
65
  "eval_samples_per_second": 21.797,
66
  "eval_steps_per_second": 10.899,
67
  "step": 6
68
+ },
69
+ {
70
+ "epoch": 0.017811704834605598,
71
+ "grad_norm": 2.5362422466278076,
72
+ "learning_rate": 0.00014,
73
+ "loss": 0.8316,
74
+ "step": 7
75
+ },
76
+ {
77
+ "epoch": 0.020356234096692113,
78
+ "grad_norm": 2.6198008060455322,
79
+ "learning_rate": 0.00016,
80
+ "loss": 0.6159,
81
+ "step": 8
82
+ },
83
+ {
84
+ "epoch": 0.022900763358778626,
85
+ "grad_norm": 2.256551742553711,
86
+ "learning_rate": 0.00018,
87
+ "loss": 0.4382,
88
+ "step": 9
89
+ },
90
+ {
91
+ "epoch": 0.02544529262086514,
92
+ "grad_norm": 2.475969076156616,
93
+ "learning_rate": 0.0002,
94
+ "loss": 0.3562,
95
+ "step": 10
96
+ },
97
+ {
98
+ "epoch": 0.027989821882951654,
99
+ "grad_norm": 3.510289192199707,
100
+ "learning_rate": 0.00019749279121818235,
101
+ "loss": 0.3165,
102
+ "step": 11
103
+ },
104
+ {
105
+ "epoch": 0.030534351145038167,
106
+ "grad_norm": 1.8848199844360352,
107
+ "learning_rate": 0.0001900968867902419,
108
+ "loss": 0.2195,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 0.030534351145038167,
113
+ "eval_loss": 0.1976952999830246,
114
+ "eval_runtime": 7.5845,
115
+ "eval_samples_per_second": 21.887,
116
+ "eval_steps_per_second": 10.943,
117
+ "step": 12
118
  }
119
  ],
120
  "logging_steps": 1,
 
134
  "attributes": {}
135
  }
136
  },
137
+ "total_flos": 600116796850176.0,
138
  "train_batch_size": 2,
139
  "trial_name": null,
140
  "trial_params": null