fhdz commited on
Commit
f03486f
·
1 Parent(s): 6e380e1

update config and vocab with new special tokens handling

Browse files
Files changed (2) hide show
  1. config.json +40 -37
  2. vocab.json +10 -5
config.json CHANGED
@@ -1,36 +1,39 @@
1
  {
2
- "data": null,
3
- "tgt_vocab_size": 128256,
4
  "transforms": [
5
  "onmt_tokenize",
6
  "filtertoolong"
7
  ],
8
- "src_vocab": null,
9
- "decoder_start_token": "<s>",
10
- "skip_empty_level": "silent",
11
- "share_vocab": true,
12
- "n_sample": 0,
13
- "vocab_size_multiple": 8,
14
  "save_data": null,
 
15
  "tgt_vocab": null,
 
 
 
 
16
  "src_vocab_size": 128256,
 
 
17
  "training": {
18
- "accum_count": [
19
- 32
20
- ],
21
- "batch_size": 896,
22
  "accum_steps": [
23
  0
24
  ],
25
- "group_size": 0,
26
- "valid_batch_size": 256,
27
  "batch_size_multiple": 1,
28
- "normalization": "tokens",
 
29
  "quant_type": "",
 
 
 
30
  "w_bit": 0,
31
  "compute_dtype": "torch.bfloat16",
32
- "batch_type": "tokens",
33
- "quant_layers": []
 
 
34
  },
35
  "transforms_configs": {
36
  "filtertoolong": {
@@ -1070,42 +1073,42 @@
1070
  }
1071
  },
1072
  "model": {
1073
- "num_experts_per_tok": 0,
1074
- "add_ffnbias": false,
1075
- "left_pad": true,
1076
- "layer_norm": "rms",
1077
  "num_experts": 0,
1078
  "heads": 32,
1079
- "parallel_residual": false,
1080
- "add_qkvbias": false,
1081
- "head_dim": null,
1082
- "transformer_ff": 14336,
1083
- "heads_kv": 8,
1084
  "norm_eps": 1e-05,
1085
- "architecture": "transformer_lm",
1086
  "mlp_activation_fn": "gated-silu",
1087
- "hidden_size": 4096,
1088
- "shared_layer_norm": false,
1089
- "sliding_window": 0,
1090
  "layers": 32,
 
 
 
 
 
 
 
1091
  "embeddings": {
1092
  "n_positions": 0,
 
1093
  "position_encoding_type": "Rotary",
1094
- "src_word_vec_size": 4096,
1095
- "tgt_word_vec_size": 4096
1096
  },
1097
  "rope_config": {
1098
- "rotary_theta": 500000,
1099
- "rotary_interleave": false
1100
  },
1101
  "decoder": {
1102
  "position_encoding_type": "Rotary",
1103
- "decoder_type": "transformer_lm",
1104
  "n_positions": 0,
 
1105
  "tgt_word_vec_size": 4096,
1106
  "rope_config": {
1107
- "rotary_theta": 500000,
1108
- "rotary_interleave": false
1109
  }
1110
  }
1111
  },
 
1
  {
2
+ "eos_token": "<|end_of_text|>",
3
+ "share_vocab": true,
4
  "transforms": [
5
  "onmt_tokenize",
6
  "filtertoolong"
7
  ],
8
+ "data": null,
9
+ "tgt_vocab_size": 128256,
 
 
 
 
10
  "save_data": null,
11
+ "vocab_size_multiple": 8,
12
  "tgt_vocab": null,
13
+ "n_sample": 0,
14
+ "src_vocab": null,
15
+ "skip_empty_level": "silent",
16
+ "bos_token": "<|begin_of_text|>",
17
  "src_vocab_size": 128256,
18
+ "decoder_start_token": "<|begin_of_text|>",
19
+ "pad_token": "<blank>",
20
  "training": {
 
 
 
 
21
  "accum_steps": [
22
  0
23
  ],
 
 
24
  "batch_size_multiple": 1,
25
+ "valid_batch_size": 256,
26
+ "batch_type": "tokens",
27
  "quant_type": "",
28
+ "accum_count": [
29
+ 32
30
+ ],
31
  "w_bit": 0,
32
  "compute_dtype": "torch.bfloat16",
33
+ "normalization": "tokens",
34
+ "quant_layers": [],
35
+ "group_size": 0,
36
+ "batch_size": 896
37
  },
38
  "transforms_configs": {
39
  "filtertoolong": {
 
1073
  }
1074
  },
1075
  "model": {
1076
+ "head_dim": null,
1077
+ "sliding_window": 0,
1078
+ "architecture": "transformer_lm",
 
1079
  "num_experts": 0,
1080
  "heads": 32,
1081
+ "hidden_size": 4096,
 
 
 
 
1082
  "norm_eps": 1e-05,
1083
+ "num_experts_per_tok": 0,
1084
  "mlp_activation_fn": "gated-silu",
1085
+ "left_pad": true,
 
 
1086
  "layers": 32,
1087
+ "layer_norm": "rms",
1088
+ "shared_layer_norm": false,
1089
+ "add_ffnbias": false,
1090
+ "heads_kv": 8,
1091
+ "add_qkvbias": false,
1092
+ "parallel_residual": false,
1093
+ "transformer_ff": 14336,
1094
  "embeddings": {
1095
  "n_positions": 0,
1096
+ "tgt_word_vec_size": 4096,
1097
  "position_encoding_type": "Rotary",
1098
+ "src_word_vec_size": 4096
 
1099
  },
1100
  "rope_config": {
1101
+ "rotary_interleave": false,
1102
+ "rotary_theta": 500000
1103
  },
1104
  "decoder": {
1105
  "position_encoding_type": "Rotary",
 
1106
  "n_positions": 0,
1107
+ "decoder_type": "transformer_lm",
1108
  "tgt_word_vec_size": 4096,
1109
  "rope_config": {
1110
+ "rotary_interleave": false,
1111
+ "rotary_theta": 500000
1112
  }
1113
  }
1114
  },
vocab.json CHANGED
@@ -128000,7 +128000,7 @@
128000
  "ĠвÑĭÑģокой",
128001
  "ãĥ¼ãĥ¼",
128002
  "éĶ¦",
128003
- "<s>",
128004
  "<|end_of_text|>",
128005
  "<|reserved_special_token_0|>",
128006
  "<|reserved_special_token_1|>",
@@ -128009,7 +128009,7 @@
128009
  "<|start_header_id|>",
128010
  "<|end_header_id|>",
128011
  "<|reserved_special_token_4|>",
128012
- "</s>",
128013
  "<|reserved_special_token_5|>",
128014
  "<|reserved_special_token_6|>",
128015
  "<|reserved_special_token_7|>",
@@ -256258,7 +256258,7 @@
256258
  "ĠвÑĭÑģокой",
256259
  "ãĥ¼ãĥ¼",
256260
  "éĶ¦",
256261
- "<s>",
256262
  "<|end_of_text|>",
256263
  "<|reserved_special_token_0|>",
256264
  "<|reserved_special_token_1|>",
@@ -256267,7 +256267,7 @@
256267
  "<|start_header_id|>",
256268
  "<|end_header_id|>",
256269
  "<|reserved_special_token_4|>",
256270
- "</s>",
256271
  "<|reserved_special_token_5|>",
256272
  "<|reserved_special_token_6|>",
256273
  "<|reserved_special_token_7|>",
@@ -256515,5 +256515,10 @@
256515
  "<|reserved_special_token_249|>",
256516
  "<|reserved_special_token_250|>"
256517
  ],
256518
- "decoder_start_token": "<s>"
 
 
 
 
 
256519
  }
 
128000
  "ĠвÑĭÑģокой",
128001
  "ãĥ¼ãĥ¼",
128002
  "éĶ¦",
128003
+ "<|begin_of_text|>",
128004
  "<|end_of_text|>",
128005
  "<|reserved_special_token_0|>",
128006
  "<|reserved_special_token_1|>",
 
128009
  "<|start_header_id|>",
128010
  "<|end_header_id|>",
128011
  "<|reserved_special_token_4|>",
128012
+ "<|eot_id|>",
128013
  "<|reserved_special_token_5|>",
128014
  "<|reserved_special_token_6|>",
128015
  "<|reserved_special_token_7|>",
 
256258
  "ĠвÑĭÑģокой",
256259
  "ãĥ¼ãĥ¼",
256260
  "éĶ¦",
256261
+ "<|begin_of_text|>",
256262
  "<|end_of_text|>",
256263
  "<|reserved_special_token_0|>",
256264
  "<|reserved_special_token_1|>",
 
256267
  "<|start_header_id|>",
256268
  "<|end_header_id|>",
256269
  "<|reserved_special_token_4|>",
256270
+ "<|eot_id|>",
256271
  "<|reserved_special_token_5|>",
256272
  "<|reserved_special_token_6|>",
256273
  "<|reserved_special_token_7|>",
 
256515
  "<|reserved_special_token_249|>",
256516
  "<|reserved_special_token_250|>"
256517
  ],
256518
+ "decoder_start_token": "<|begin_of_text|>",
256519
+ "specials": {
256520
+ "bos_token": "<|begin_of_text|>",
256521
+ "eos_token": "<|end_of_text|>",
256522
+ "pad_token": "<blank>"
256523
+ }
256524
  }