saroyehun commited on
Commit
21f2c3f
·
verified ·
1 Parent(s): 7b551c0

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "up_proj",
 
27
  "q_proj",
 
28
  "gate_proj",
29
  "down_proj",
30
- "k_proj",
31
- "v_proj",
32
- "o_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "k_proj",
27
+ "v_proj",
28
  "q_proj",
29
+ "o_proj",
30
  "gate_proj",
31
  "down_proj",
32
+ "up_proj"
 
 
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09b6ad08e785ff4a7e6fa0f752e3c1d0a8876cc57b725d97ff4ddbfb97806007
3
  size 167829552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db460eeecfa91492354ea790b21b8a7e91b5c5009f042d56cc701dc751672b00
3
  size 167829552
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:910fb028a194fc81d6e4c25c5f50f4777625b3074cbaeff4a4d3457f55da9cc0
3
  size 2437269589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c849359fb82e245721a869ac39c84aadf1f709dccf13814a7acdf56baf9d4c31
3
  size 2437269589
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf8e4b940c685c9a29bb9c56e24fc55d3eca8893584937552a982f0a3ded15ed
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d308617a3f0ded8c54da0edb3b08058bd87e51eb8559c9109d22d584d4df93b
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e13e3002f028d33baa0e0c26f51425d8900d6d3234ac815ebaa0bea860e786f0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a0e278424810899810446e537fd2d6c54cb88b2a6adecce26f9308494a9937
3
  size 1064
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
trainer_state.json CHANGED
@@ -1,125 +1,101 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10816657652785289,
5
- "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01081665765278529,
13
- "eval_accuracy": 0.7290303584415322,
14
- "eval_loss": 1.2268004417419434,
15
- "eval_runtime": 456.6189,
16
- "eval_samples_per_second": 34.25,
17
- "eval_steps_per_second": 1.071,
18
- "step": 100
19
- },
20
- {
21
- "epoch": 0.02163331530557058,
22
- "eval_accuracy": 0.7563876376099345,
23
- "eval_loss": 1.078468918800354,
24
- "eval_runtime": 456.7154,
25
- "eval_samples_per_second": 34.242,
26
- "eval_steps_per_second": 1.071,
27
- "step": 200
28
- },
29
- {
30
- "epoch": 0.03244997295835587,
31
- "eval_accuracy": 0.7668799267017361,
32
- "eval_loss": 1.0238652229309082,
33
- "eval_runtime": 456.7597,
34
- "eval_samples_per_second": 34.239,
35
- "eval_steps_per_second": 1.071,
36
- "step": 300
37
  },
38
  {
39
- "epoch": 0.04326663061114116,
40
- "eval_accuracy": 0.7730476325324204,
41
- "eval_loss": 0.9894265532493591,
42
- "eval_runtime": 456.7371,
43
- "eval_samples_per_second": 34.241,
44
- "eval_steps_per_second": 1.071,
45
- "step": 400
46
  },
47
  {
48
- "epoch": 0.05408328826392644,
49
- "grad_norm": 2.1639328002929688,
50
- "learning_rate": 4.909861186226789e-05,
51
- "loss": 1.2276,
52
- "step": 500
53
  },
54
  {
55
- "epoch": 0.05408328826392644,
56
- "eval_accuracy": 0.7784154459204475,
57
- "eval_loss": 0.9612082839012146,
58
- "eval_runtime": 456.6452,
59
- "eval_samples_per_second": 34.248,
60
- "eval_steps_per_second": 1.071,
61
- "step": 500
62
  },
63
  {
64
- "epoch": 0.06489994591671173,
65
- "eval_accuracy": 0.782137070411325,
66
- "eval_loss": 0.9466578960418701,
67
- "eval_runtime": 456.6288,
68
- "eval_samples_per_second": 34.249,
69
- "eval_steps_per_second": 1.071,
70
- "step": 600
71
  },
72
  {
73
- "epoch": 0.07571660356949703,
74
- "eval_accuracy": 0.7840395143262153,
75
- "eval_loss": 0.930482804775238,
76
- "eval_runtime": 456.4891,
77
- "eval_samples_per_second": 34.259,
78
- "eval_steps_per_second": 1.071,
79
- "step": 700
80
  },
81
  {
82
- "epoch": 0.08653326122228232,
83
- "eval_accuracy": 0.7867131143188731,
84
- "eval_loss": 0.9141340851783752,
85
- "eval_runtime": 456.5273,
86
- "eval_samples_per_second": 34.256,
87
- "eval_steps_per_second": 1.071,
88
- "step": 800
89
  },
90
  {
91
- "epoch": 0.09734991887506761,
92
- "eval_accuracy": 0.7895170312687647,
93
- "eval_loss": 0.9005721211433411,
94
- "eval_runtime": 456.6578,
95
- "eval_samples_per_second": 34.247,
96
- "eval_steps_per_second": 1.071,
97
- "step": 900
98
  },
99
  {
100
- "epoch": 0.10816657652785289,
101
- "grad_norm": 1.771813154220581,
102
- "learning_rate": 4.819722372453579e-05,
103
- "loss": 0.9226,
104
- "step": 1000
105
  },
106
  {
107
- "epoch": 0.10816657652785289,
108
- "eval_accuracy": 0.7909631562396202,
109
- "eval_loss": 0.896154522895813,
110
- "eval_runtime": 456.9174,
111
- "eval_samples_per_second": 34.227,
112
- "eval_steps_per_second": 1.07,
113
- "step": 1000
114
  }
115
  ],
116
  "logging_steps": 500,
117
- "max_steps": 27735,
118
  "num_input_tokens_seen": 0,
119
  "num_train_epochs": 3,
120
- "save_steps": 200,
121
- "total_flos": 7.41887283560448e+17,
122
- "train_batch_size": 32,
123
  "trial_name": null,
124
  "trial_params": null
125
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08815232722143865,
5
+ "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01763046544428773,
13
+ "grad_norm": 1.5435048341751099,
14
+ "learning_rate": 4.9706158909261876e-05,
15
+ "loss": 0.8407,
16
+ "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  {
19
+ "epoch": 0.01763046544428773,
20
+ "eval_accuracy": 0.8252411503248731,
21
+ "eval_loss": 0.7565935850143433,
22
+ "eval_runtime": 2895.6955,
23
+ "eval_samples_per_second": 32.989,
24
+ "eval_steps_per_second": 0.516,
25
+ "step": 500
26
  },
27
  {
28
+ "epoch": 0.03526093088857546,
29
+ "grad_norm": 1.5116485357284546,
30
+ "learning_rate": 4.9412317818523744e-05,
31
+ "loss": 0.5964,
32
+ "step": 1000
33
  },
34
  {
35
+ "epoch": 0.03526093088857546,
36
+ "eval_accuracy": 0.8373293281429335,
37
+ "eval_loss": 0.6963507533073425,
38
+ "eval_runtime": 2899.1751,
39
+ "eval_samples_per_second": 32.949,
40
+ "eval_steps_per_second": 0.515,
41
+ "step": 1000
42
  },
43
  {
44
+ "epoch": 0.05289139633286319,
45
+ "grad_norm": 1.4373358488082886,
46
+ "learning_rate": 4.911847672778562e-05,
47
+ "loss": 0.5661,
48
+ "step": 1500
 
 
49
  },
50
  {
51
+ "epoch": 0.05289139633286319,
52
+ "eval_accuracy": 0.8443953465863471,
53
+ "eval_loss": 0.6656736731529236,
54
+ "eval_runtime": 2944.9636,
55
+ "eval_samples_per_second": 32.437,
56
+ "eval_steps_per_second": 0.507,
57
+ "step": 1500
58
  },
59
  {
60
+ "epoch": 0.07052186177715092,
61
+ "grad_norm": 1.216012716293335,
62
+ "learning_rate": 4.882463563704749e-05,
63
+ "loss": 0.5402,
64
+ "step": 2000
 
 
65
  },
66
  {
67
+ "epoch": 0.07052186177715092,
68
+ "eval_accuracy": 0.8482718545347777,
69
+ "eval_loss": 0.6440214514732361,
70
+ "eval_runtime": 2944.5243,
71
+ "eval_samples_per_second": 32.442,
72
+ "eval_steps_per_second": 0.507,
73
+ "step": 2000
74
  },
75
  {
76
+ "epoch": 0.08815232722143865,
77
+ "grad_norm": 1.0847452878952026,
78
+ "learning_rate": 4.853079454630936e-05,
79
+ "loss": 0.5237,
80
+ "step": 2500
81
  },
82
  {
83
+ "epoch": 0.08815232722143865,
84
+ "eval_accuracy": 0.8508165457808422,
85
+ "eval_loss": 0.6308088898658752,
86
+ "eval_runtime": 2933.5042,
87
+ "eval_samples_per_second": 32.564,
88
+ "eval_steps_per_second": 0.509,
89
+ "step": 2500
90
  }
91
  ],
92
  "logging_steps": 500,
93
+ "max_steps": 85080,
94
  "num_input_tokens_seen": 0,
95
  "num_train_epochs": 3,
96
+ "save_steps": 500,
97
+ "total_flos": 3.70943641780224e+18,
98
+ "train_batch_size": 64,
99
  "trial_name": null,
100
  "trial_params": null
101
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f5d28438c47cb396b581ed4e21f17a1d4cd12a8b4cc30b0dcbe645d79a66946
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42bdf919ed7a6250ad8ef6a641eabe72eb284b41d7a86c9a598168438a8b6505
3
  size 5048