pefanis27 commited on
Commit
409ed64
·
verified ·
1 Parent(s): 276b1b5

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "qkv_proj",
27
  "o_proj",
28
- "down_proj",
29
- "gate_up_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "o_proj",
27
+ "qkv_proj",
28
+ "gate_up_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1b8b56f3b700a6bea1e2bca037d2506f4811f2d8270799c66980c14fe48dc69
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:272a00f063966eb6a188503f16dfaafeeb0b3938fb064be3e153d64dfa16cb4b
3
  size 100697728
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 11.0,
3
- "eval_loss": 0.7707116603851318,
4
- "eval_runtime": 103.4112,
5
- "eval_samples_per_second": 2.476,
6
- "eval_steps_per_second": 1.238,
7
- "total_flos": 2.8711375226284032e+17,
8
- "train_loss": 0.7217305723603789,
9
- "train_runtime": 19548.1756,
10
- "train_samples_per_second": 1.596,
11
- "train_steps_per_second": 0.798
12
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_loss": 1.006455421447754,
4
+ "eval_runtime": 6.8379,
5
+ "eval_samples_per_second": 2.486,
6
+ "eval_steps_per_second": 0.731,
7
+ "total_flos": 8689108767160320.0,
8
+ "train_loss": 0.9555120922270275,
9
+ "train_runtime": 582.4007,
10
+ "train_samples_per_second": 0.713,
11
+ "train_steps_per_second": 0.18
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 11.0,
3
- "eval_loss": 0.7707116603851318,
4
- "eval_runtime": 103.4112,
5
- "eval_samples_per_second": 2.476,
6
- "eval_steps_per_second": 1.238
7
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_loss": 1.006455421447754,
4
+ "eval_runtime": 6.8379,
5
+ "eval_samples_per_second": 2.486,
6
+ "eval_steps_per_second": 0.731
7
  }
runs/Jan13_05-28-33_dmlab/events.out.tfevents.1736738913.dmlab.67479.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f803481d3f0bbfc18400647f693e840fdc101cf29ac62a44aabcefa2ec9c61c
3
+ size 8794
runs/Jan13_05-38-09_dmlab/events.out.tfevents.1736739489.dmlab.68165.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd48dd236376b12f91682cd6a2fb7eb986c728357ceab6c97f976c4f755ee01
3
+ size 9263
runs/Jan13_05-43-08_dmlab/events.out.tfevents.1736739789.dmlab.69294.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805dc2394501b132f8f71f3ea9f2fb04fc7f9f2a510fa132e43b670de0f2589e
3
+ size 11030
runs/Jan13_05-43-08_dmlab/events.out.tfevents.1736740378.dmlab.69294.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9000e42f8dca2f9562ef60c3b7766ecae263c5aa2fab3ba365006576109a279
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 11.0,
3
- "total_flos": 2.8711375226284032e+17,
4
- "train_loss": 0.7217305723603789,
5
- "train_runtime": 19548.1756,
6
- "train_samples_per_second": 1.596,
7
- "train_steps_per_second": 0.798
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 8689108767160320.0,
4
+ "train_loss": 0.9555120922270275,
5
+ "train_runtime": 582.4007,
6
+ "train_samples_per_second": 0.713,
7
+ "train_steps_per_second": 0.18
8
  }
trainer_state.json CHANGED
@@ -1,192 +1,102 @@
1
  {
2
- "best_metric": 0.7596490383148193,
3
- "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-4992",
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 6864,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.0626534223556519,
14
- "learning_rate": 4e-05,
15
- "loss": 0.9984,
16
- "step": 624
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 0.969778299331665,
21
- "eval_runtime": 103.4178,
22
- "eval_samples_per_second": 2.475,
23
- "eval_steps_per_second": 1.238,
24
- "step": 624
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 0.967179000377655,
29
- "learning_rate": 8e-05,
30
- "loss": 0.8711,
31
- "step": 1248
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 0.9493655562400818,
36
- "eval_runtime": 103.4116,
37
- "eval_samples_per_second": 2.476,
38
- "eval_steps_per_second": 1.238,
39
- "step": 1248
40
  },
41
  {
42
  "epoch": 3.0,
43
- "grad_norm": 1.005494475364685,
44
- "learning_rate": 0.00012,
45
- "loss": 0.837,
46
- "step": 1872
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_loss": 0.9314318895339966,
51
- "eval_runtime": 103.4002,
52
  "eval_samples_per_second": 2.476,
53
- "eval_steps_per_second": 1.238,
54
- "step": 1872
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 0.9488267302513123,
59
- "learning_rate": 0.00016,
60
- "loss": 0.8053,
61
- "step": 2496
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 0.9051965475082397,
66
- "eval_runtime": 103.4268,
67
- "eval_samples_per_second": 2.475,
68
- "eval_steps_per_second": 1.238,
69
- "step": 2496
70
  },
71
  {
72
  "epoch": 5.0,
73
- "grad_norm": 1.2727611064910889,
74
- "learning_rate": 0.0002,
75
- "loss": 0.7577,
76
- "step": 3120
77
  },
78
  {
79
  "epoch": 5.0,
80
- "eval_loss": 0.8548531532287598,
81
- "eval_runtime": 103.3949,
82
- "eval_samples_per_second": 2.476,
83
- "eval_steps_per_second": 1.238,
84
- "step": 3120
85
- },
86
- {
87
- "epoch": 6.0,
88
- "grad_norm": 1.3708724975585938,
89
- "learning_rate": 0.00019876883405951377,
90
- "loss": 0.6823,
91
- "step": 3744
92
- },
93
- {
94
- "epoch": 6.0,
95
- "eval_loss": 0.7896583080291748,
96
- "eval_runtime": 103.416,
97
- "eval_samples_per_second": 2.475,
98
- "eval_steps_per_second": 1.238,
99
- "step": 3744
100
- },
101
- {
102
- "epoch": 7.0,
103
- "grad_norm": 1.065096378326416,
104
- "learning_rate": 0.00019510565162951537,
105
- "loss": 0.6376,
106
- "step": 4368
107
- },
108
- {
109
- "epoch": 7.0,
110
- "eval_loss": 0.7802127003669739,
111
- "eval_runtime": 103.439,
112
- "eval_samples_per_second": 2.475,
113
- "eval_steps_per_second": 1.237,
114
- "step": 4368
115
- },
116
- {
117
- "epoch": 8.0,
118
- "grad_norm": 0.973934531211853,
119
- "learning_rate": 0.0001891006524188368,
120
- "loss": 0.6122,
121
- "step": 4992
122
- },
123
- {
124
- "epoch": 8.0,
125
- "eval_loss": 0.7596490383148193,
126
- "eval_runtime": 103.3881,
127
- "eval_samples_per_second": 2.476,
128
- "eval_steps_per_second": 1.238,
129
- "step": 4992
130
  },
131
  {
132
- "epoch": 9.0,
133
- "grad_norm": 1.0052335262298584,
134
- "learning_rate": 0.00018090169943749476,
135
- "loss": 0.5947,
136
- "step": 5616
137
- },
138
- {
139
- "epoch": 9.0,
140
- "eval_loss": 0.7636829614639282,
141
- "eval_runtime": 103.3982,
142
- "eval_samples_per_second": 2.476,
143
- "eval_steps_per_second": 1.238,
144
- "step": 5616
145
- },
146
- {
147
- "epoch": 10.0,
148
- "grad_norm": 0.9973880648612976,
149
- "learning_rate": 0.00017071067811865476,
150
- "loss": 0.5797,
151
- "step": 6240
152
- },
153
- {
154
- "epoch": 10.0,
155
- "eval_loss": 0.7768124341964722,
156
- "eval_runtime": 103.4116,
157
- "eval_samples_per_second": 2.476,
158
- "eval_steps_per_second": 1.238,
159
- "step": 6240
160
- },
161
- {
162
- "epoch": 11.0,
163
- "grad_norm": 0.9684802293777466,
164
- "learning_rate": 0.00015877852522924732,
165
- "loss": 0.5631,
166
- "step": 6864
167
- },
168
- {
169
- "epoch": 11.0,
170
- "eval_loss": 0.7780652046203613,
171
- "eval_runtime": 103.4007,
172
- "eval_samples_per_second": 2.476,
173
- "eval_steps_per_second": 1.238,
174
- "step": 6864
175
- },
176
- {
177
- "epoch": 11.0,
178
- "step": 6864,
179
- "total_flos": 2.8711375226284032e+17,
180
- "train_loss": 0.7217305723603789,
181
- "train_runtime": 19548.1756,
182
- "train_samples_per_second": 1.596,
183
- "train_steps_per_second": 0.798
184
  }
185
  ],
186
  "logging_steps": 500,
187
- "max_steps": 15600,
188
  "num_input_tokens_seen": 0,
189
- "num_train_epochs": 25,
190
  "save_steps": 500,
191
  "stateful_callbacks": {
192
  "EarlyStoppingCallback": {
@@ -195,7 +105,7 @@
195
  "early_stopping_threshold": 0.0
196
  },
197
  "attributes": {
198
- "early_stopping_patience_counter": 3
199
  }
200
  },
201
  "TrainerControl": {
@@ -209,8 +119,8 @@
209
  "attributes": {}
210
  }
211
  },
212
- "total_flos": 2.8711375226284032e+17,
213
- "train_batch_size": 2,
214
  "trial_name": null,
215
  "trial_params": null
216
  }
 
1
  {
2
+ "best_metric": 0.9966219663619995,
3
+ "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-63",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.5774533748626709,
14
+ "learning_rate": 0.0002,
15
+ "loss": 1.2076,
16
+ "step": 21
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 1.060381531715393,
21
+ "eval_runtime": 6.8834,
22
+ "eval_samples_per_second": 2.47,
23
+ "eval_steps_per_second": 0.726,
24
+ "step": 21
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 0.4814026951789856,
29
+ "learning_rate": 0.00017071067811865476,
30
+ "loss": 0.9571,
31
+ "step": 42
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 0.998464047908783,
36
+ "eval_runtime": 6.8587,
37
+ "eval_samples_per_second": 2.479,
38
+ "eval_steps_per_second": 0.729,
39
+ "step": 42
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "grad_norm": 0.5674614906311035,
44
+ "learning_rate": 0.0001,
45
+ "loss": 0.901,
46
+ "step": 63
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_loss": 0.9966219663619995,
51
+ "eval_runtime": 6.8664,
52
  "eval_samples_per_second": 2.476,
53
+ "eval_steps_per_second": 0.728,
54
+ "step": 63
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 0.5171260833740234,
59
+ "learning_rate": 2.9289321881345254e-05,
60
+ "loss": 0.8659,
61
+ "step": 84
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 1.001924753189087,
66
+ "eval_runtime": 6.8571,
67
+ "eval_samples_per_second": 2.479,
68
+ "eval_steps_per_second": 0.729,
69
+ "step": 84
70
  },
71
  {
72
  "epoch": 5.0,
73
+ "grad_norm": 0.4767768681049347,
74
+ "learning_rate": 0.0,
75
+ "loss": 0.846,
76
+ "step": 105
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "eval_loss": 1.0021475553512573,
81
+ "eval_runtime": 6.8536,
82
+ "eval_samples_per_second": 2.48,
83
+ "eval_steps_per_second": 0.73,
84
+ "step": 105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  },
86
  {
87
+ "epoch": 5.0,
88
+ "step": 105,
89
+ "total_flos": 8689108767160320.0,
90
+ "train_loss": 0.9555120922270275,
91
+ "train_runtime": 582.4007,
92
+ "train_samples_per_second": 0.713,
93
+ "train_steps_per_second": 0.18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
  ],
96
  "logging_steps": 500,
97
+ "max_steps": 105,
98
  "num_input_tokens_seen": 0,
99
+ "num_train_epochs": 5,
100
  "save_steps": 500,
101
  "stateful_callbacks": {
102
  "EarlyStoppingCallback": {
 
105
  "early_stopping_threshold": 0.0
106
  },
107
  "attributes": {
108
+ "early_stopping_patience_counter": 2
109
  }
110
  },
111
  "TrainerControl": {
 
119
  "attributes": {}
120
  }
121
  },
122
+ "total_flos": 8689108767160320.0,
123
+ "train_batch_size": 4,
124
  "trial_name": null,
125
  "trial_params": null
126
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e29c43b50ee6b41567386050e54799b545a4237e251ef4c7b3fefda200b7073d
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b13726b299e8575d2863caea80e5c8126effe8fd59991a7918fa2d8a2e87a4b9
3
  size 5624