pefanis27 commited on
Commit
9f8219b
·
verified ·
1 Parent(s): 409ed64

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "o_proj",
27
- "qkv_proj",
28
  "gate_up_proj",
29
- "down_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "gate_up_proj",
27
+ "o_proj",
28
+ "down_proj",
29
+ "qkv_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:272a00f063966eb6a188503f16dfaafeeb0b3938fb064be3e153d64dfa16cb4b
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de015679f6b9c3f3cebe7a92f3fb63d181165fcdf04bf929c38c658d15a6185
3
  size 100697728
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 1.006455421447754,
4
- "eval_runtime": 6.8379,
5
- "eval_samples_per_second": 2.486,
6
- "eval_steps_per_second": 0.731,
7
  "total_flos": 8689108767160320.0,
8
- "train_loss": 0.9555120922270275,
9
- "train_runtime": 582.4007,
10
- "train_samples_per_second": 0.713,
11
- "train_steps_per_second": 0.18
12
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 1.0679360628128052,
4
+ "eval_runtime": 6.7666,
5
+ "eval_samples_per_second": 2.512,
6
+ "eval_steps_per_second": 0.739,
7
  "total_flos": 8689108767160320.0,
8
+ "train_loss": 1.114040283929734,
9
+ "train_runtime": 580.4964,
10
+ "train_samples_per_second": 0.715,
11
+ "train_steps_per_second": 0.181
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 1.006455421447754,
4
- "eval_runtime": 6.8379,
5
- "eval_samples_per_second": 2.486,
6
- "eval_steps_per_second": 0.731
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 1.0679360628128052,
4
+ "eval_runtime": 6.7666,
5
+ "eval_samples_per_second": 2.512,
6
+ "eval_steps_per_second": 0.739
7
  }
runs/Jan13_05-53-27_dmlab/events.out.tfevents.1736740407.dmlab.71228.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da475c892856f07b3fe86892f9b3b6f0123c137bf03e8167b5e9f7feb202f94
3
+ size 11029
runs/Jan13_05-53-27_dmlab/events.out.tfevents.1736740995.dmlab.71228.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05da92b40cc13cec68ab3a14131fcd62d62b8d42f8b354900515bf8cea28cadf
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 8689108767160320.0,
4
- "train_loss": 0.9555120922270275,
5
- "train_runtime": 582.4007,
6
- "train_samples_per_second": 0.713,
7
- "train_steps_per_second": 0.18
8
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 8689108767160320.0,
4
+ "train_loss": 1.114040283929734,
5
+ "train_runtime": 580.4964,
6
+ "train_samples_per_second": 0.715,
7
+ "train_steps_per_second": 0.181
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9966219663619995,
3
- "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-63",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
  "global_step": 105,
@@ -10,87 +10,87 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.5774533748626709,
14
- "learning_rate": 0.0002,
15
- "loss": 1.2076,
16
  "step": 21
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 1.060381531715393,
21
- "eval_runtime": 6.8834,
22
- "eval_samples_per_second": 2.47,
23
- "eval_steps_per_second": 0.726,
24
  "step": 21
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 0.4814026951789856,
29
- "learning_rate": 0.00017071067811865476,
30
- "loss": 0.9571,
31
  "step": 42
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 0.998464047908783,
36
- "eval_runtime": 6.8587,
37
- "eval_samples_per_second": 2.479,
38
- "eval_steps_per_second": 0.729,
39
  "step": 42
40
  },
41
  {
42
  "epoch": 3.0,
43
- "grad_norm": 0.5674614906311035,
44
- "learning_rate": 0.0001,
45
- "loss": 0.901,
46
  "step": 63
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_loss": 0.9966219663619995,
51
- "eval_runtime": 6.8664,
52
- "eval_samples_per_second": 2.476,
53
- "eval_steps_per_second": 0.728,
54
  "step": 63
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 0.5171260833740234,
59
- "learning_rate": 2.9289321881345254e-05,
60
- "loss": 0.8659,
61
  "step": 84
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 1.001924753189087,
66
- "eval_runtime": 6.8571,
67
- "eval_samples_per_second": 2.479,
68
- "eval_steps_per_second": 0.729,
69
  "step": 84
70
  },
71
  {
72
  "epoch": 5.0,
73
- "grad_norm": 0.4767768681049347,
74
  "learning_rate": 0.0,
75
- "loss": 0.846,
76
  "step": 105
77
  },
78
  {
79
  "epoch": 5.0,
80
- "eval_loss": 1.0021475553512573,
81
- "eval_runtime": 6.8536,
82
- "eval_samples_per_second": 2.48,
83
- "eval_steps_per_second": 0.73,
84
  "step": 105
85
  },
86
  {
87
  "epoch": 5.0,
88
  "step": 105,
89
  "total_flos": 8689108767160320.0,
90
- "train_loss": 0.9555120922270275,
91
- "train_runtime": 582.4007,
92
- "train_samples_per_second": 0.713,
93
- "train_steps_per_second": 0.18
94
  }
95
  ],
96
  "logging_steps": 500,
@@ -105,7 +105,7 @@
105
  "early_stopping_threshold": 0.0
106
  },
107
  "attributes": {
108
- "early_stopping_patience_counter": 2
109
  }
110
  },
111
  "TrainerControl": {
 
1
  {
2
+ "best_metric": 1.0568883419036865,
3
+ "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-105",
4
  "epoch": 5.0,
5
  "eval_steps": 500,
6
  "global_step": 105,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.9515901803970337,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.3492,
16
  "step": 21
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 1.2440305948257446,
21
+ "eval_runtime": 6.9085,
22
+ "eval_samples_per_second": 2.461,
23
+ "eval_steps_per_second": 0.724,
24
  "step": 21
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 0.7905489802360535,
29
+ "learning_rate": 1.7071067811865477e-05,
30
+ "loss": 1.1695,
31
  "step": 42
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 1.1394829750061035,
36
+ "eval_runtime": 6.8809,
37
+ "eval_samples_per_second": 2.471,
38
+ "eval_steps_per_second": 0.727,
39
  "step": 42
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "grad_norm": 0.7479790449142456,
44
+ "learning_rate": 1e-05,
45
+ "loss": 1.0622,
46
  "step": 63
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_loss": 1.081365942955017,
51
+ "eval_runtime": 6.9014,
52
+ "eval_samples_per_second": 2.463,
53
+ "eval_steps_per_second": 0.724,
54
  "step": 63
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 0.8422548174858093,
59
+ "learning_rate": 2.9289321881345257e-06,
60
+ "loss": 1.0051,
61
  "step": 84
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 1.0581644773483276,
66
+ "eval_runtime": 6.7884,
67
+ "eval_samples_per_second": 2.504,
68
+ "eval_steps_per_second": 0.737,
69
  "step": 84
70
  },
71
  {
72
  "epoch": 5.0,
73
+ "grad_norm": 0.6641820073127747,
74
  "learning_rate": 0.0,
75
+ "loss": 0.9842,
76
  "step": 105
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "eval_loss": 1.0568883419036865,
81
+ "eval_runtime": 6.7736,
82
+ "eval_samples_per_second": 2.51,
83
+ "eval_steps_per_second": 0.738,
84
  "step": 105
85
  },
86
  {
87
  "epoch": 5.0,
88
  "step": 105,
89
  "total_flos": 8689108767160320.0,
90
+ "train_loss": 1.114040283929734,
91
+ "train_runtime": 580.4964,
92
+ "train_samples_per_second": 0.715,
93
+ "train_steps_per_second": 0.181
94
  }
95
  ],
96
  "logging_steps": 500,
 
105
  "early_stopping_threshold": 0.0
106
  },
107
  "attributes": {
108
+ "early_stopping_patience_counter": 0
109
  }
110
  },
111
  "TrainerControl": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b13726b299e8575d2863caea80e5c8126effe8fd59991a7918fa2d8a2e87a4b9
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddbf45ff3036803d9eefb17433db74ae9d1d762a4ec386a585d223f7ba8f5362
3
  size 5624