ncbateman commited on
Commit
fb5b946
·
verified ·
1 Parent(s): 6ad8c24

Training in progress, step 9, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "v_proj",
24
- "up_proj",
25
  "k_proj",
26
- "gate_proj",
27
  "o_proj",
 
 
28
  "down_proj",
29
- "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "v_proj",
 
24
  "k_proj",
 
25
  "o_proj",
26
+ "gate_proj",
27
+ "q_proj",
28
  "down_proj",
29
+ "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91675e0edb8fc212240b19dbe4c0dfe4bdd9b5aabe5562193896fdc75be5b749
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1456fe05d4a5f61e4112aa90db1cce20673480e548d9f151a4ab5a551cf4e7a7
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcd3defc66f8366a1638dad92d39c903c00068597515a3d9f671e77030721a16
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42169ad320d4e6831462a4b4e3ff65fbb1ddee54d9b0ad98a74b29f02985694c
3
  size 49846260
last-checkpoint/trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.11428571428571428,
13
- "grad_norm": 0.297858864068985,
14
  "learning_rate": 3.3333333333333333e-06,
15
  "loss": 0.9229,
16
  "step": 1
@@ -18,73 +18,73 @@
18
  {
19
  "epoch": 0.11428571428571428,
20
  "eval_loss": 0.9428475499153137,
21
- "eval_runtime": 1.1726,
22
- "eval_samples_per_second": 12.792,
23
- "eval_steps_per_second": 1.706,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.22857142857142856,
28
- "grad_norm": 0.28549230098724365,
29
  "learning_rate": 6.666666666666667e-06,
30
  "loss": 0.934,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.34285714285714286,
35
- "grad_norm": 0.29542049765586853,
36
  "learning_rate": 1e-05,
37
- "loss": 0.9051,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.45714285714285713,
42
- "grad_norm": 0.31487610936164856,
43
  "learning_rate": 1.3333333333333333e-05,
44
- "loss": 0.9611,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.5714285714285714,
49
- "grad_norm": 0.28305381536483765,
50
  "learning_rate": 1.6666666666666667e-05,
51
- "loss": 0.8858,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.5714285714285714,
56
- "eval_loss": 0.9421737194061279,
57
- "eval_runtime": 1.1755,
58
- "eval_samples_per_second": 12.761,
59
- "eval_steps_per_second": 1.701,
60
  "step": 5
61
  },
62
  {
63
  "epoch": 0.6857142857142857,
64
- "grad_norm": 0.27843713760375977,
65
  "learning_rate": 2e-05,
66
- "loss": 0.8866,
67
  "step": 6
68
  },
69
  {
70
  "epoch": 0.8,
71
- "grad_norm": 0.29819053411483765,
72
  "learning_rate": 2.3333333333333336e-05,
73
- "loss": 0.946,
74
  "step": 7
75
  },
76
  {
77
  "epoch": 0.9142857142857143,
78
- "grad_norm": 0.28852030634880066,
79
  "learning_rate": 2.6666666666666667e-05,
80
- "loss": 0.9269,
81
  "step": 8
82
  },
83
  {
84
  "epoch": 1.0285714285714285,
85
- "grad_norm": 0.29183101654052734,
86
  "learning_rate": 3e-05,
87
- "loss": 0.9571,
88
  "step": 9
89
  }
90
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.11428571428571428,
13
+ "grad_norm": 0.2845376431941986,
14
  "learning_rate": 3.3333333333333333e-06,
15
  "loss": 0.9229,
16
  "step": 1
 
18
  {
19
  "epoch": 0.11428571428571428,
20
  "eval_loss": 0.9428475499153137,
21
+ "eval_runtime": 1.1757,
22
+ "eval_samples_per_second": 12.759,
23
+ "eval_steps_per_second": 1.701,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.22857142857142856,
28
+ "grad_norm": 0.2736399173736572,
29
  "learning_rate": 6.666666666666667e-06,
30
  "loss": 0.934,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.34285714285714286,
35
+ "grad_norm": 0.2849612534046173,
36
  "learning_rate": 1e-05,
37
+ "loss": 0.9062,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.45714285714285713,
42
+ "grad_norm": 0.2999660074710846,
43
  "learning_rate": 1.3333333333333333e-05,
44
+ "loss": 0.961,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.5714285714285714,
49
+ "grad_norm": 0.28077682852745056,
50
  "learning_rate": 1.6666666666666667e-05,
51
+ "loss": 0.8859,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.5714285714285714,
56
+ "eval_loss": 0.9408887624740601,
57
+ "eval_runtime": 1.173,
58
+ "eval_samples_per_second": 12.788,
59
+ "eval_steps_per_second": 1.705,
60
  "step": 5
61
  },
62
  {
63
  "epoch": 0.6857142857142857,
64
+ "grad_norm": 0.26969340443611145,
65
  "learning_rate": 2e-05,
66
+ "loss": 0.8855,
67
  "step": 6
68
  },
69
  {
70
  "epoch": 0.8,
71
+ "grad_norm": 0.29263830184936523,
72
  "learning_rate": 2.3333333333333336e-05,
73
+ "loss": 0.947,
74
  "step": 7
75
  },
76
  {
77
  "epoch": 0.9142857142857143,
78
+ "grad_norm": 0.2760126292705536,
79
  "learning_rate": 2.6666666666666667e-05,
80
+ "loss": 0.9271,
81
  "step": 8
82
  },
83
  {
84
  "epoch": 1.0285714285714285,
85
+ "grad_norm": 0.28623923659324646,
86
  "learning_rate": 3e-05,
87
+ "loss": 0.9583,
88
  "step": 9
89
  }
90
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99230b71d0796f311dacff19bbd23ce0c156c91f56dcad6d2641c4a67d801b4
3
  size 6712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee7c94ea541326b0c9a6f9078d58f0ff70cdf993db3cbf645a326d138dce960
3
  size 6712