pefanis27 commited on
Commit
eab9d53
·
verified ·
1 Parent(s): 9f8219b

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_up_proj",
27
  "o_proj",
28
- "down_proj",
29
- "qkv_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "qkv_proj",
27
  "o_proj",
28
+ "gate_up_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1de015679f6b9c3f3cebe7a92f3fb63d181165fcdf04bf929c38c658d15a6185
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8dd3dcd915b2dd761edd13f06737584ebfa1e3321219edc4805c74bef799516
3
  size 100697728
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 1.0679360628128052,
4
- "eval_runtime": 6.7666,
5
- "eval_samples_per_second": 2.512,
6
- "eval_steps_per_second": 0.739,
7
  "total_flos": 8689108767160320.0,
8
- "train_loss": 1.114040283929734,
9
- "train_runtime": 580.4964,
10
- "train_samples_per_second": 0.715,
11
  "train_steps_per_second": 0.181
12
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 1.0177021026611328,
4
+ "eval_runtime": 6.8483,
5
+ "eval_samples_per_second": 2.482,
6
+ "eval_steps_per_second": 0.73,
7
  "total_flos": 8689108767160320.0,
8
+ "train_loss": 0.9847308204287575,
9
+ "train_runtime": 581.6572,
10
+ "train_samples_per_second": 0.713,
11
  "train_steps_per_second": 0.181
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 1.0679360628128052,
4
- "eval_runtime": 6.7666,
5
- "eval_samples_per_second": 2.512,
6
- "eval_steps_per_second": 0.739
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 1.0177021026611328,
4
+ "eval_runtime": 6.8483,
5
+ "eval_samples_per_second": 2.482,
6
+ "eval_steps_per_second": 0.73
7
  }
runs/Jan13_06-21-44_dmlab/events.out.tfevents.1736742105.dmlab.73737.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a48b974ce95793bee33b65bc03651ee64448ca4164acef2c6f8787ec309c9b
3
+ size 8317
runs/Jan13_06-22-58_dmlab/events.out.tfevents.1736742178.dmlab.74800.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a51dd3d43ee98d872ddfc9661657aa68bf569dc4af5fa99ad9fddcbb288374b3
3
+ size 11029
runs/Jan13_06-22-58_dmlab/events.out.tfevents.1736742767.dmlab.74800.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5c8d05f220923e8c4cf381f7a80873ca0d43fc5573ad7cde483489a80b07c2
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 8689108767160320.0,
4
- "train_loss": 1.114040283929734,
5
- "train_runtime": 580.4964,
6
- "train_samples_per_second": 0.715,
7
  "train_steps_per_second": 0.181
8
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 8689108767160320.0,
4
+ "train_loss": 0.9847308204287575,
5
+ "train_runtime": 581.6572,
6
+ "train_samples_per_second": 0.713,
7
  "train_steps_per_second": 0.181
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.0568883419036865,
3
  "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-105",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
@@ -10,86 +10,86 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.9515901803970337,
14
- "learning_rate": 2e-05,
15
- "loss": 1.3492,
16
  "step": 21
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 1.2440305948257446,
21
- "eval_runtime": 6.9085,
22
- "eval_samples_per_second": 2.461,
23
- "eval_steps_per_second": 0.724,
24
  "step": 21
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 0.7905489802360535,
29
- "learning_rate": 1.7071067811865477e-05,
30
- "loss": 1.1695,
31
  "step": 42
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 1.1394829750061035,
36
- "eval_runtime": 6.8809,
37
- "eval_samples_per_second": 2.471,
38
- "eval_steps_per_second": 0.727,
39
  "step": 42
40
  },
41
  {
42
  "epoch": 3.0,
43
- "grad_norm": 0.7479790449142456,
44
- "learning_rate": 1e-05,
45
- "loss": 1.0622,
46
  "step": 63
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_loss": 1.081365942955017,
51
- "eval_runtime": 6.9014,
52
- "eval_samples_per_second": 2.463,
53
- "eval_steps_per_second": 0.724,
54
  "step": 63
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 0.8422548174858093,
59
- "learning_rate": 2.9289321881345257e-06,
60
- "loss": 1.0051,
61
  "step": 84
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 1.0581644773483276,
66
- "eval_runtime": 6.7884,
67
- "eval_samples_per_second": 2.504,
68
- "eval_steps_per_second": 0.737,
69
  "step": 84
70
  },
71
  {
72
  "epoch": 5.0,
73
- "grad_norm": 0.6641820073127747,
74
  "learning_rate": 0.0,
75
- "loss": 0.9842,
76
  "step": 105
77
  },
78
  {
79
  "epoch": 5.0,
80
- "eval_loss": 1.0568883419036865,
81
- "eval_runtime": 6.7736,
82
- "eval_samples_per_second": 2.51,
83
- "eval_steps_per_second": 0.738,
84
  "step": 105
85
  },
86
  {
87
  "epoch": 5.0,
88
  "step": 105,
89
  "total_flos": 8689108767160320.0,
90
- "train_loss": 1.114040283929734,
91
- "train_runtime": 580.4964,
92
- "train_samples_per_second": 0.715,
93
  "train_steps_per_second": 0.181
94
  }
95
  ],
 
1
  {
2
+ "best_metric": 1.0051764249801636,
3
  "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-105",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.7281827330589294,
14
+ "learning_rate": 0.0001,
15
+ "loss": 1.2517,
16
  "step": 21
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 1.1211004257202148,
21
+ "eval_runtime": 6.7788,
22
+ "eval_samples_per_second": 2.508,
23
+ "eval_steps_per_second": 0.738,
24
  "step": 21
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 0.5565729737281799,
29
+ "learning_rate": 8.535533905932738e-05,
30
+ "loss": 0.9912,
31
  "step": 42
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 1.0125099420547485,
36
+ "eval_runtime": 6.8252,
37
+ "eval_samples_per_second": 2.491,
38
+ "eval_steps_per_second": 0.733,
39
  "step": 42
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "grad_norm": 0.730018138885498,
44
+ "learning_rate": 5e-05,
45
+ "loss": 0.9198,
46
  "step": 63
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_loss": 1.0076608657836914,
51
+ "eval_runtime": 6.788,
52
+ "eval_samples_per_second": 2.504,
53
+ "eval_steps_per_second": 0.737,
54
  "step": 63
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 0.912876546382904,
59
+ "learning_rate": 1.4644660940672627e-05,
60
+ "loss": 0.8883,
61
  "step": 84
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 1.0058414936065674,
66
+ "eval_runtime": 6.8991,
67
+ "eval_samples_per_second": 2.464,
68
+ "eval_steps_per_second": 0.725,
69
  "step": 84
70
  },
71
  {
72
  "epoch": 5.0,
73
+ "grad_norm": 0.5380117893218994,
74
  "learning_rate": 0.0,
75
+ "loss": 0.8727,
76
  "step": 105
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "eval_loss": 1.0051764249801636,
81
+ "eval_runtime": 6.8394,
82
+ "eval_samples_per_second": 2.486,
83
+ "eval_steps_per_second": 0.731,
84
  "step": 105
85
  },
86
  {
87
  "epoch": 5.0,
88
  "step": 105,
89
  "total_flos": 8689108767160320.0,
90
+ "train_loss": 0.9847308204287575,
91
+ "train_runtime": 581.6572,
92
+ "train_samples_per_second": 0.713,
93
  "train_steps_per_second": 0.181
94
  }
95
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddbf45ff3036803d9eefb17433db74ae9d1d762a4ec386a585d223f7ba8f5362
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13e576b6949e0d2dec213e01d234ec58654620365de37ce95f267570e8fb4ee
3
  size 5624