pefanis27 commited on
Commit
276b1b5
·
verified ·
1 Parent(s): 1b5478c

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -23,9 +23,9 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "down_proj",
27
  "qkv_proj",
28
  "o_proj",
 
29
  "gate_up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "qkv_proj",
27
  "o_proj",
28
+ "down_proj",
29
  "gate_up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41b60cffda93a140952b4e8e313fd8a8519ca094dd39b7008a23a9942e80caa3
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b8b56f3b700a6bea1e2bca037d2506f4811f2d8270799c66980c14fe48dc69
3
  size 100697728
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_loss": 1.0098934173583984,
4
- "eval_runtime": 7.0732,
5
- "eval_samples_per_second": 2.403,
6
- "eval_steps_per_second": 1.272,
7
- "total_flos": 1.2128346681348096e+16,
8
- "train_loss": 0.8996787428044949,
9
- "train_runtime": 846.9703,
10
- "train_samples_per_second": 0.98,
11
- "train_steps_per_second": 0.496
12
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "eval_loss": 0.7707116603851318,
4
+ "eval_runtime": 103.4112,
5
+ "eval_samples_per_second": 2.476,
6
+ "eval_steps_per_second": 1.238,
7
+ "total_flos": 2.8711375226284032e+17,
8
+ "train_loss": 0.7217305723603789,
9
+ "train_runtime": 19548.1756,
10
+ "train_samples_per_second": 1.596,
11
+ "train_steps_per_second": 0.798
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_loss": 1.0098934173583984,
4
- "eval_runtime": 7.0732,
5
- "eval_samples_per_second": 2.403,
6
- "eval_steps_per_second": 1.272
7
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "eval_loss": 0.7707116603851318,
4
+ "eval_runtime": 103.4112,
5
+ "eval_samples_per_second": 2.476,
6
+ "eval_steps_per_second": 1.238
7
  }
runs/Jan12_19-05-30_dmlab/events.out.tfevents.1736701530.dmlab.9923.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea451e6836705c4213d569c92050dce2d40dd61fefa3e76493bf24617e46571f
3
+ size 8942
runs/Jan12_19-26-23_dmlab/events.out.tfevents.1736702784.dmlab.12960.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c3af953317924b74894174a19e6891e5da60c2ee9a62af28194155428ca9fd6
3
+ size 13979
runs/Jan12_19-26-23_dmlab/events.out.tfevents.1736722436.dmlab.12960.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6258b6e6405a2a28ee922fa51f7132627d4c1bebe946d9260579999a61bcffe7
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.0,
3
- "total_flos": 1.2128346681348096e+16,
4
- "train_loss": 0.8996787428044949,
5
- "train_runtime": 846.9703,
6
- "train_samples_per_second": 0.98,
7
- "train_steps_per_second": 0.496
8
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "total_flos": 2.8711375226284032e+17,
4
+ "train_loss": 0.7217305723603789,
5
+ "train_runtime": 19548.1756,
6
+ "train_samples_per_second": 1.596,
7
+ "train_steps_per_second": 0.798
8
  }
trainer_state.json CHANGED
@@ -1,132 +1,192 @@
1
  {
2
- "best_metric": 1.0029878616333008,
3
- "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-168",
4
- "epoch": 7.0,
5
  "eval_steps": 500,
6
- "global_step": 294,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.0487970113754272,
14
- "learning_rate": 0.0001,
15
- "loss": 1.1905,
16
- "step": 42
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 1.0386497974395752,
21
- "eval_runtime": 7.0656,
22
- "eval_samples_per_second": 2.406,
23
- "eval_steps_per_second": 1.274,
24
- "step": 42
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 1.0445537567138672,
29
- "learning_rate": 0.0002,
30
- "loss": 0.9483,
31
- "step": 84
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 1.0180531740188599,
36
- "eval_runtime": 7.0935,
37
- "eval_samples_per_second": 2.397,
38
- "eval_steps_per_second": 1.269,
39
- "step": 84
40
  },
41
  {
42
  "epoch": 3.0,
43
- "grad_norm": 0.8360362648963928,
44
- "learning_rate": 0.0001923879532511287,
45
- "loss": 0.9012,
46
- "step": 126
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_loss": 1.0081464052200317,
51
- "eval_runtime": 7.1072,
52
- "eval_samples_per_second": 2.392,
53
- "eval_steps_per_second": 1.266,
54
- "step": 126
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 0.9580802917480469,
59
- "learning_rate": 0.00017071067811865476,
60
- "loss": 0.8626,
61
- "step": 168
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 1.0029878616333008,
66
- "eval_runtime": 7.081,
67
- "eval_samples_per_second": 2.401,
68
- "eval_steps_per_second": 1.271,
69
- "step": 168
70
  },
71
  {
72
  "epoch": 5.0,
73
- "grad_norm": 0.8999230861663818,
74
- "learning_rate": 0.000138268343236509,
75
- "loss": 0.8324,
76
- "step": 210
77
  },
78
  {
79
  "epoch": 5.0,
80
- "eval_loss": 1.0067561864852905,
81
- "eval_runtime": 7.0886,
82
- "eval_samples_per_second": 2.398,
83
- "eval_steps_per_second": 1.27,
84
- "step": 210
85
  },
86
  {
87
  "epoch": 6.0,
88
- "grad_norm": 0.8453378081321716,
89
- "learning_rate": 0.0001,
90
- "loss": 0.8004,
91
- "step": 252
92
  },
93
  {
94
  "epoch": 6.0,
95
- "eval_loss": 1.0179213285446167,
96
- "eval_runtime": 7.0766,
97
- "eval_samples_per_second": 2.402,
98
- "eval_steps_per_second": 1.272,
99
- "step": 252
100
  },
101
  {
102
  "epoch": 7.0,
103
- "grad_norm": 1.0025222301483154,
104
- "learning_rate": 6.173165676349103e-05,
105
- "loss": 0.7622,
106
- "step": 294
107
  },
108
  {
109
  "epoch": 7.0,
110
- "eval_loss": 1.043684959411621,
111
- "eval_runtime": 7.0812,
112
- "eval_samples_per_second": 2.401,
113
- "eval_steps_per_second": 1.271,
114
- "step": 294
115
  },
116
  {
117
- "epoch": 7.0,
118
- "step": 294,
119
- "total_flos": 1.2128346681348096e+16,
120
- "train_loss": 0.8996787428044949,
121
- "train_runtime": 846.9703,
122
- "train_samples_per_second": 0.98,
123
- "train_steps_per_second": 0.496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "logging_steps": 500,
127
- "max_steps": 420,
128
  "num_input_tokens_seen": 0,
129
- "num_train_epochs": 10,
130
  "save_steps": 500,
131
  "stateful_callbacks": {
132
  "EarlyStoppingCallback": {
@@ -149,7 +209,7 @@
149
  "attributes": {}
150
  }
151
  },
152
- "total_flos": 1.2128346681348096e+16,
153
  "train_batch_size": 2,
154
  "trial_name": null,
155
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7596490383148193,
3
+ "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-4992",
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 6864,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.0626534223556519,
14
+ "learning_rate": 4e-05,
15
+ "loss": 0.9984,
16
+ "step": 624
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 0.969778299331665,
21
+ "eval_runtime": 103.4178,
22
+ "eval_samples_per_second": 2.475,
23
+ "eval_steps_per_second": 1.238,
24
+ "step": 624
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 0.967179000377655,
29
+ "learning_rate": 8e-05,
30
+ "loss": 0.8711,
31
+ "step": 1248
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 0.9493655562400818,
36
+ "eval_runtime": 103.4116,
37
+ "eval_samples_per_second": 2.476,
38
+ "eval_steps_per_second": 1.238,
39
+ "step": 1248
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "grad_norm": 1.005494475364685,
44
+ "learning_rate": 0.00012,
45
+ "loss": 0.837,
46
+ "step": 1872
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_loss": 0.9314318895339966,
51
+ "eval_runtime": 103.4002,
52
+ "eval_samples_per_second": 2.476,
53
+ "eval_steps_per_second": 1.238,
54
+ "step": 1872
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 0.9488267302513123,
59
+ "learning_rate": 0.00016,
60
+ "loss": 0.8053,
61
+ "step": 2496
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 0.9051965475082397,
66
+ "eval_runtime": 103.4268,
67
+ "eval_samples_per_second": 2.475,
68
+ "eval_steps_per_second": 1.238,
69
+ "step": 2496
70
  },
71
  {
72
  "epoch": 5.0,
73
+ "grad_norm": 1.2727611064910889,
74
+ "learning_rate": 0.0002,
75
+ "loss": 0.7577,
76
+ "step": 3120
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "eval_loss": 0.8548531532287598,
81
+ "eval_runtime": 103.3949,
82
+ "eval_samples_per_second": 2.476,
83
+ "eval_steps_per_second": 1.238,
84
+ "step": 3120
85
  },
86
  {
87
  "epoch": 6.0,
88
+ "grad_norm": 1.3708724975585938,
89
+ "learning_rate": 0.00019876883405951377,
90
+ "loss": 0.6823,
91
+ "step": 3744
92
  },
93
  {
94
  "epoch": 6.0,
95
+ "eval_loss": 0.7896583080291748,
96
+ "eval_runtime": 103.416,
97
+ "eval_samples_per_second": 2.475,
98
+ "eval_steps_per_second": 1.238,
99
+ "step": 3744
100
  },
101
  {
102
  "epoch": 7.0,
103
+ "grad_norm": 1.065096378326416,
104
+ "learning_rate": 0.00019510565162951537,
105
+ "loss": 0.6376,
106
+ "step": 4368
107
  },
108
  {
109
  "epoch": 7.0,
110
+ "eval_loss": 0.7802127003669739,
111
+ "eval_runtime": 103.439,
112
+ "eval_samples_per_second": 2.475,
113
+ "eval_steps_per_second": 1.237,
114
+ "step": 4368
115
  },
116
  {
117
+ "epoch": 8.0,
118
+ "grad_norm": 0.973934531211853,
119
+ "learning_rate": 0.0001891006524188368,
120
+ "loss": 0.6122,
121
+ "step": 4992
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "eval_loss": 0.7596490383148193,
126
+ "eval_runtime": 103.3881,
127
+ "eval_samples_per_second": 2.476,
128
+ "eval_steps_per_second": 1.238,
129
+ "step": 4992
130
+ },
131
+ {
132
+ "epoch": 9.0,
133
+ "grad_norm": 1.0052335262298584,
134
+ "learning_rate": 0.00018090169943749476,
135
+ "loss": 0.5947,
136
+ "step": 5616
137
+ },
138
+ {
139
+ "epoch": 9.0,
140
+ "eval_loss": 0.7636829614639282,
141
+ "eval_runtime": 103.3982,
142
+ "eval_samples_per_second": 2.476,
143
+ "eval_steps_per_second": 1.238,
144
+ "step": 5616
145
+ },
146
+ {
147
+ "epoch": 10.0,
148
+ "grad_norm": 0.9973880648612976,
149
+ "learning_rate": 0.00017071067811865476,
150
+ "loss": 0.5797,
151
+ "step": 6240
152
+ },
153
+ {
154
+ "epoch": 10.0,
155
+ "eval_loss": 0.7768124341964722,
156
+ "eval_runtime": 103.4116,
157
+ "eval_samples_per_second": 2.476,
158
+ "eval_steps_per_second": 1.238,
159
+ "step": 6240
160
+ },
161
+ {
162
+ "epoch": 11.0,
163
+ "grad_norm": 0.9684802293777466,
164
+ "learning_rate": 0.00015877852522924732,
165
+ "loss": 0.5631,
166
+ "step": 6864
167
+ },
168
+ {
169
+ "epoch": 11.0,
170
+ "eval_loss": 0.7780652046203613,
171
+ "eval_runtime": 103.4007,
172
+ "eval_samples_per_second": 2.476,
173
+ "eval_steps_per_second": 1.238,
174
+ "step": 6864
175
+ },
176
+ {
177
+ "epoch": 11.0,
178
+ "step": 6864,
179
+ "total_flos": 2.8711375226284032e+17,
180
+ "train_loss": 0.7217305723603789,
181
+ "train_runtime": 19548.1756,
182
+ "train_samples_per_second": 1.596,
183
+ "train_steps_per_second": 0.798
184
  }
185
  ],
186
  "logging_steps": 500,
187
+ "max_steps": 15600,
188
  "num_input_tokens_seen": 0,
189
+ "num_train_epochs": 25,
190
  "save_steps": 500,
191
  "stateful_callbacks": {
192
  "EarlyStoppingCallback": {
 
209
  "attributes": {}
210
  }
211
  },
212
+ "total_flos": 2.8711375226284032e+17,
213
  "train_batch_size": 2,
214
  "trial_name": null,
215
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a0dba0afd6c07504c9951fbbc2017ec670cad8a3b2ca6951fe367b50a131172
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29c43b50ee6b41567386050e54799b545a4237e251ef4c7b3fefda200b7073d
3
  size 5624