llama3-8b-instruct / config.json
fhdz's picture
update config and vocab with new special tokens handling
f03486f
{
"eos_token": "<|end_of_text|>",
"share_vocab": true,
"transforms": [
"onmt_tokenize",
"filtertoolong"
],
"data": null,
"tgt_vocab_size": 128256,
"save_data": null,
"vocab_size_multiple": 8,
"tgt_vocab": null,
"n_sample": 0,
"src_vocab": null,
"skip_empty_level": "silent",
"bos_token": "<|begin_of_text|>",
"src_vocab_size": 128256,
"decoder_start_token": "<|begin_of_text|>",
"pad_token": "<blank>",
"training": {
"accum_steps": [
0
],
"batch_size_multiple": 1,
"valid_batch_size": 256,
"batch_type": "tokens",
"quant_type": "",
"accum_count": [
32
],
"w_bit": 0,
"compute_dtype": "torch.bfloat16",
"normalization": "tokens",
"quant_layers": [],
"group_size": 0,
"batch_size": 896
},
"transforms_configs": {
"filtertoolong": {
"tgt_seq_length": 512,
"src_seq_length": 512
},
"onmt_tokenize": {
"gpt2_pretok": true,
"src_subword_model": "${MODEL_PATH}/bpe.model",
"mapped_tokens": [
[
"<|begin_of_text|>",
"⦅begin_of_text⦆"
],
[
"<|end_of_text|>",
"⦅end_of_text⦆"
],
[
"<|reserved_special_token_0|>",
"⦅reserved_special_token_0⦆"
],
[
"<|reserved_special_token_1|>",
"⦅reserved_special_token_1⦆"
],
[
"<|reserved_special_token_2|>",
"⦅reserved_special_token_2⦆"
],
[
"<|reserved_special_token_3|>",
"⦅reserved_special_token_3⦆"
],
[
"<|start_header_id|>",
"⦅start_header_id⦆"
],
[
"<|end_header_id|>",
"⦅end_header_id⦆"
],
[
"<|reserved_special_token_4|>",
"⦅reserved_special_token_4⦆"
],
[
"<|eot_id|>",
"⦅eot_id⦆"
],
[
"<|reserved_special_token_5|>",
"⦅reserved_special_token_5⦆"
],
[
"<|reserved_special_token_6|>",
"⦅reserved_special_token_6⦆"
],
[
"<|reserved_special_token_7|>",
"⦅reserved_special_token_7⦆"
],
[
"<|reserved_special_token_8|>",
"⦅reserved_special_token_8⦆"
],
[
"<|reserved_special_token_9|>",
"⦅reserved_special_token_9⦆"
],
[
"<|reserved_special_token_10|>",
"⦅reserved_special_token_10⦆"
],
[
"<|reserved_special_token_11|>",
"⦅reserved_special_token_11⦆"
],
[
"<|reserved_special_token_12|>",
"⦅reserved_special_token_12⦆"
],
[
"<|reserved_special_token_13|>",
"⦅reserved_special_token_13⦆"
],
[
"<|reserved_special_token_14|>",
"⦅reserved_special_token_14⦆"
],
[
"<|reserved_special_token_15|>",
"⦅reserved_special_token_15⦆"
],
[
"<|reserved_special_token_16|>",
"⦅reserved_special_token_16⦆"
],
[
"<|reserved_special_token_17|>",
"⦅reserved_special_token_17⦆"
],
[
"<|reserved_special_token_18|>",
"⦅reserved_special_token_18⦆"
],
[
"<|reserved_special_token_19|>",
"⦅reserved_special_token_19⦆"
],
[
"<|reserved_special_token_20|>",
"⦅reserved_special_token_20⦆"
],
[
"<|reserved_special_token_21|>",
"⦅reserved_special_token_21⦆"
],
[
"<|reserved_special_token_22|>",
"⦅reserved_special_token_22⦆"
],
[
"<|reserved_special_token_23|>",
"⦅reserved_special_token_23⦆"
],
[
"<|reserved_special_token_24|>",
"⦅reserved_special_token_24⦆"
],
[
"<|reserved_special_token_25|>",
"⦅reserved_special_token_25⦆"
],
[
"<|reserved_special_token_26|>",
"⦅reserved_special_token_26⦆"
],
[
"<|reserved_special_token_27|>",
"⦅reserved_special_token_27⦆"
],
[
"<|reserved_special_token_28|>",
"⦅reserved_special_token_28⦆"
],
[
"<|reserved_special_token_29|>",
"⦅reserved_special_token_29⦆"
],
[
"<|reserved_special_token_30|>",
"⦅reserved_special_token_30⦆"
],
[
"<|reserved_special_token_31|>",
"⦅reserved_special_token_31⦆"
],
[
"<|reserved_special_token_32|>",
"⦅reserved_special_token_32⦆"
],
[
"<|reserved_special_token_33|>",
"⦅reserved_special_token_33⦆"
],
[
"<|reserved_special_token_34|>",
"⦅reserved_special_token_34⦆"
],
[
"<|reserved_special_token_35|>",
"⦅reserved_special_token_35⦆"
],
[
"<|reserved_special_token_36|>",
"⦅reserved_special_token_36⦆"
],
[
"<|reserved_special_token_37|>",
"⦅reserved_special_token_37⦆"
],
[
"<|reserved_special_token_38|>",
"⦅reserved_special_token_38⦆"
],
[
"<|reserved_special_token_39|>",
"⦅reserved_special_token_39⦆"
],
[
"<|reserved_special_token_40|>",
"⦅reserved_special_token_40⦆"
],
[
"<|reserved_special_token_41|>",
"⦅reserved_special_token_41⦆"
],
[
"<|reserved_special_token_42|>",
"⦅reserved_special_token_42⦆"
],
[
"<|reserved_special_token_43|>",
"⦅reserved_special_token_43⦆"
],
[
"<|reserved_special_token_44|>",
"⦅reserved_special_token_44⦆"
],
[
"<|reserved_special_token_45|>",
"⦅reserved_special_token_45⦆"
],
[
"<|reserved_special_token_46|>",
"⦅reserved_special_token_46⦆"
],
[
"<|reserved_special_token_47|>",
"⦅reserved_special_token_47⦆"
],
[
"<|reserved_special_token_48|>",
"⦅reserved_special_token_48⦆"
],
[
"<|reserved_special_token_49|>",
"⦅reserved_special_token_49⦆"
],
[
"<|reserved_special_token_50|>",
"⦅reserved_special_token_50⦆"
],
[
"<|reserved_special_token_51|>",
"⦅reserved_special_token_51⦆"
],
[
"<|reserved_special_token_52|>",
"⦅reserved_special_token_52⦆"
],
[
"<|reserved_special_token_53|>",
"⦅reserved_special_token_53⦆"
],
[
"<|reserved_special_token_54|>",
"⦅reserved_special_token_54⦆"
],
[
"<|reserved_special_token_55|>",
"⦅reserved_special_token_55⦆"
],
[
"<|reserved_special_token_56|>",
"⦅reserved_special_token_56⦆"
],
[
"<|reserved_special_token_57|>",
"⦅reserved_special_token_57⦆"
],
[
"<|reserved_special_token_58|>",
"⦅reserved_special_token_58⦆"
],
[
"<|reserved_special_token_59|>",
"⦅reserved_special_token_59⦆"
],
[
"<|reserved_special_token_60|>",
"⦅reserved_special_token_60⦆"
],
[
"<|reserved_special_token_61|>",
"⦅reserved_special_token_61⦆"
],
[
"<|reserved_special_token_62|>",
"⦅reserved_special_token_62⦆"
],
[
"<|reserved_special_token_63|>",
"⦅reserved_special_token_63⦆"
],
[
"<|reserved_special_token_64|>",
"⦅reserved_special_token_64⦆"
],
[
"<|reserved_special_token_65|>",
"⦅reserved_special_token_65⦆"
],
[
"<|reserved_special_token_66|>",
"⦅reserved_special_token_66⦆"
],
[
"<|reserved_special_token_67|>",
"⦅reserved_special_token_67⦆"
],
[
"<|reserved_special_token_68|>",
"⦅reserved_special_token_68⦆"
],
[
"<|reserved_special_token_69|>",
"⦅reserved_special_token_69⦆"
],
[
"<|reserved_special_token_70|>",
"⦅reserved_special_token_70⦆"
],
[
"<|reserved_special_token_71|>",
"⦅reserved_special_token_71⦆"
],
[
"<|reserved_special_token_72|>",
"⦅reserved_special_token_72⦆"
],
[
"<|reserved_special_token_73|>",
"⦅reserved_special_token_73⦆"
],
[
"<|reserved_special_token_74|>",
"⦅reserved_special_token_74⦆"
],
[
"<|reserved_special_token_75|>",
"⦅reserved_special_token_75⦆"
],
[
"<|reserved_special_token_76|>",
"⦅reserved_special_token_76⦆"
],
[
"<|reserved_special_token_77|>",
"⦅reserved_special_token_77⦆"
],
[
"<|reserved_special_token_78|>",
"⦅reserved_special_token_78⦆"
],
[
"<|reserved_special_token_79|>",
"⦅reserved_special_token_79⦆"
],
[
"<|reserved_special_token_80|>",
"⦅reserved_special_token_80⦆"
],
[
"<|reserved_special_token_81|>",
"⦅reserved_special_token_81⦆"
],
[
"<|reserved_special_token_82|>",
"⦅reserved_special_token_82⦆"
],
[
"<|reserved_special_token_83|>",
"⦅reserved_special_token_83⦆"
],
[
"<|reserved_special_token_84|>",
"⦅reserved_special_token_84⦆"
],
[
"<|reserved_special_token_85|>",
"⦅reserved_special_token_85⦆"
],
[
"<|reserved_special_token_86|>",
"⦅reserved_special_token_86⦆"
],
[
"<|reserved_special_token_87|>",
"⦅reserved_special_token_87⦆"
],
[
"<|reserved_special_token_88|>",
"⦅reserved_special_token_88⦆"
],
[
"<|reserved_special_token_89|>",
"⦅reserved_special_token_89⦆"
],
[
"<|reserved_special_token_90|>",
"⦅reserved_special_token_90⦆"
],
[
"<|reserved_special_token_91|>",
"⦅reserved_special_token_91⦆"
],
[
"<|reserved_special_token_92|>",
"⦅reserved_special_token_92⦆"
],
[
"<|reserved_special_token_93|>",
"⦅reserved_special_token_93⦆"
],
[
"<|reserved_special_token_94|>",
"⦅reserved_special_token_94⦆"
],
[
"<|reserved_special_token_95|>",
"⦅reserved_special_token_95⦆"
],
[
"<|reserved_special_token_96|>",
"⦅reserved_special_token_96⦆"
],
[
"<|reserved_special_token_97|>",
"⦅reserved_special_token_97⦆"
],
[
"<|reserved_special_token_98|>",
"⦅reserved_special_token_98⦆"
],
[
"<|reserved_special_token_99|>",
"⦅reserved_special_token_99⦆"
],
[
"<|reserved_special_token_100|>",
"⦅reserved_special_token_100⦆"
],
[
"<|reserved_special_token_101|>",
"⦅reserved_special_token_101⦆"
],
[
"<|reserved_special_token_102|>",
"⦅reserved_special_token_102⦆"
],
[
"<|reserved_special_token_103|>",
"⦅reserved_special_token_103⦆"
],
[
"<|reserved_special_token_104|>",
"⦅reserved_special_token_104⦆"
],
[
"<|reserved_special_token_105|>",
"⦅reserved_special_token_105⦆"
],
[
"<|reserved_special_token_106|>",
"⦅reserved_special_token_106⦆"
],
[
"<|reserved_special_token_107|>",
"⦅reserved_special_token_107⦆"
],
[
"<|reserved_special_token_108|>",
"⦅reserved_special_token_108⦆"
],
[
"<|reserved_special_token_109|>",
"⦅reserved_special_token_109⦆"
],
[
"<|reserved_special_token_110|>",
"⦅reserved_special_token_110⦆"
],
[
"<|reserved_special_token_111|>",
"⦅reserved_special_token_111⦆"
],
[
"<|reserved_special_token_112|>",
"⦅reserved_special_token_112⦆"
],
[
"<|reserved_special_token_113|>",
"⦅reserved_special_token_113⦆"
],
[
"<|reserved_special_token_114|>",
"⦅reserved_special_token_114⦆"
],
[
"<|reserved_special_token_115|>",
"⦅reserved_special_token_115⦆"
],
[
"<|reserved_special_token_116|>",
"⦅reserved_special_token_116⦆"
],
[
"<|reserved_special_token_117|>",
"⦅reserved_special_token_117⦆"
],
[
"<|reserved_special_token_118|>",
"⦅reserved_special_token_118⦆"
],
[
"<|reserved_special_token_119|>",
"⦅reserved_special_token_119⦆"
],
[
"<|reserved_special_token_120|>",
"⦅reserved_special_token_120⦆"
],
[
"<|reserved_special_token_121|>",
"⦅reserved_special_token_121⦆"
],
[
"<|reserved_special_token_122|>",
"⦅reserved_special_token_122⦆"
],
[
"<|reserved_special_token_123|>",
"⦅reserved_special_token_123⦆"
],
[
"<|reserved_special_token_124|>",
"⦅reserved_special_token_124⦆"
],
[
"<|reserved_special_token_125|>",
"⦅reserved_special_token_125⦆"
],
[
"<|reserved_special_token_126|>",
"⦅reserved_special_token_126⦆"
],
[
"<|reserved_special_token_127|>",
"⦅reserved_special_token_127⦆"
],
[
"<|reserved_special_token_128|>",
"⦅reserved_special_token_128⦆"
],
[
"<|reserved_special_token_129|>",
"⦅reserved_special_token_129⦆"
],
[
"<|reserved_special_token_130|>",
"⦅reserved_special_token_130⦆"
],
[
"<|reserved_special_token_131|>",
"⦅reserved_special_token_131⦆"
],
[
"<|reserved_special_token_132|>",
"⦅reserved_special_token_132⦆"
],
[
"<|reserved_special_token_133|>",
"⦅reserved_special_token_133⦆"
],
[
"<|reserved_special_token_134|>",
"⦅reserved_special_token_134⦆"
],
[
"<|reserved_special_token_135|>",
"⦅reserved_special_token_135⦆"
],
[
"<|reserved_special_token_136|>",
"⦅reserved_special_token_136⦆"
],
[
"<|reserved_special_token_137|>",
"⦅reserved_special_token_137⦆"
],
[
"<|reserved_special_token_138|>",
"⦅reserved_special_token_138⦆"
],
[
"<|reserved_special_token_139|>",
"⦅reserved_special_token_139⦆"
],
[
"<|reserved_special_token_140|>",
"⦅reserved_special_token_140⦆"
],
[
"<|reserved_special_token_141|>",
"⦅reserved_special_token_141⦆"
],
[
"<|reserved_special_token_142|>",
"⦅reserved_special_token_142⦆"
],
[
"<|reserved_special_token_143|>",
"⦅reserved_special_token_143⦆"
],
[
"<|reserved_special_token_144|>",
"⦅reserved_special_token_144⦆"
],
[
"<|reserved_special_token_145|>",
"⦅reserved_special_token_145⦆"
],
[
"<|reserved_special_token_146|>",
"⦅reserved_special_token_146⦆"
],
[
"<|reserved_special_token_147|>",
"⦅reserved_special_token_147⦆"
],
[
"<|reserved_special_token_148|>",
"⦅reserved_special_token_148⦆"
],
[
"<|reserved_special_token_149|>",
"⦅reserved_special_token_149⦆"
],
[
"<|reserved_special_token_150|>",
"⦅reserved_special_token_150⦆"
],
[
"<|reserved_special_token_151|>",
"⦅reserved_special_token_151⦆"
],
[
"<|reserved_special_token_152|>",
"⦅reserved_special_token_152⦆"
],
[
"<|reserved_special_token_153|>",
"⦅reserved_special_token_153⦆"
],
[
"<|reserved_special_token_154|>",
"⦅reserved_special_token_154⦆"
],
[
"<|reserved_special_token_155|>",
"⦅reserved_special_token_155⦆"
],
[
"<|reserved_special_token_156|>",
"⦅reserved_special_token_156⦆"
],
[
"<|reserved_special_token_157|>",
"⦅reserved_special_token_157⦆"
],
[
"<|reserved_special_token_158|>",
"⦅reserved_special_token_158⦆"
],
[
"<|reserved_special_token_159|>",
"⦅reserved_special_token_159⦆"
],
[
"<|reserved_special_token_160|>",
"⦅reserved_special_token_160⦆"
],
[
"<|reserved_special_token_161|>",
"⦅reserved_special_token_161⦆"
],
[
"<|reserved_special_token_162|>",
"⦅reserved_special_token_162⦆"
],
[
"<|reserved_special_token_163|>",
"⦅reserved_special_token_163⦆"
],
[
"<|reserved_special_token_164|>",
"⦅reserved_special_token_164⦆"
],
[
"<|reserved_special_token_165|>",
"⦅reserved_special_token_165⦆"
],
[
"<|reserved_special_token_166|>",
"⦅reserved_special_token_166⦆"
],
[
"<|reserved_special_token_167|>",
"⦅reserved_special_token_167⦆"
],
[
"<|reserved_special_token_168|>",
"⦅reserved_special_token_168⦆"
],
[
"<|reserved_special_token_169|>",
"⦅reserved_special_token_169⦆"
],
[
"<|reserved_special_token_170|>",
"⦅reserved_special_token_170⦆"
],
[
"<|reserved_special_token_171|>",
"⦅reserved_special_token_171⦆"
],
[
"<|reserved_special_token_172|>",
"⦅reserved_special_token_172⦆"
],
[
"<|reserved_special_token_173|>",
"⦅reserved_special_token_173⦆"
],
[
"<|reserved_special_token_174|>",
"⦅reserved_special_token_174⦆"
],
[
"<|reserved_special_token_175|>",
"⦅reserved_special_token_175⦆"
],
[
"<|reserved_special_token_176|>",
"⦅reserved_special_token_176⦆"
],
[
"<|reserved_special_token_177|>",
"⦅reserved_special_token_177⦆"
],
[
"<|reserved_special_token_178|>",
"⦅reserved_special_token_178⦆"
],
[
"<|reserved_special_token_179|>",
"⦅reserved_special_token_179⦆"
],
[
"<|reserved_special_token_180|>",
"⦅reserved_special_token_180⦆"
],
[
"<|reserved_special_token_181|>",
"⦅reserved_special_token_181⦆"
],
[
"<|reserved_special_token_182|>",
"⦅reserved_special_token_182⦆"
],
[
"<|reserved_special_token_183|>",
"⦅reserved_special_token_183⦆"
],
[
"<|reserved_special_token_184|>",
"⦅reserved_special_token_184⦆"
],
[
"<|reserved_special_token_185|>",
"⦅reserved_special_token_185⦆"
],
[
"<|reserved_special_token_186|>",
"⦅reserved_special_token_186⦆"
],
[
"<|reserved_special_token_187|>",
"⦅reserved_special_token_187⦆"
],
[
"<|reserved_special_token_188|>",
"⦅reserved_special_token_188⦆"
],
[
"<|reserved_special_token_189|>",
"⦅reserved_special_token_189⦆"
],
[
"<|reserved_special_token_190|>",
"⦅reserved_special_token_190⦆"
],
[
"<|reserved_special_token_191|>",
"⦅reserved_special_token_191⦆"
],
[
"<|reserved_special_token_192|>",
"⦅reserved_special_token_192⦆"
],
[
"<|reserved_special_token_193|>",
"⦅reserved_special_token_193⦆"
],
[
"<|reserved_special_token_194|>",
"⦅reserved_special_token_194⦆"
],
[
"<|reserved_special_token_195|>",
"⦅reserved_special_token_195⦆"
],
[
"<|reserved_special_token_196|>",
"⦅reserved_special_token_196⦆"
],
[
"<|reserved_special_token_197|>",
"⦅reserved_special_token_197⦆"
],
[
"<|reserved_special_token_198|>",
"⦅reserved_special_token_198⦆"
],
[
"<|reserved_special_token_199|>",
"⦅reserved_special_token_199⦆"
],
[
"<|reserved_special_token_200|>",
"⦅reserved_special_token_200⦆"
],
[
"<|reserved_special_token_201|>",
"⦅reserved_special_token_201⦆"
],
[
"<|reserved_special_token_202|>",
"⦅reserved_special_token_202⦆"
],
[
"<|reserved_special_token_203|>",
"⦅reserved_special_token_203⦆"
],
[
"<|reserved_special_token_204|>",
"⦅reserved_special_token_204⦆"
],
[
"<|reserved_special_token_205|>",
"⦅reserved_special_token_205⦆"
],
[
"<|reserved_special_token_206|>",
"⦅reserved_special_token_206⦆"
],
[
"<|reserved_special_token_207|>",
"⦅reserved_special_token_207⦆"
],
[
"<|reserved_special_token_208|>",
"⦅reserved_special_token_208⦆"
],
[
"<|reserved_special_token_209|>",
"⦅reserved_special_token_209⦆"
],
[
"<|reserved_special_token_210|>",
"⦅reserved_special_token_210⦆"
],
[
"<|reserved_special_token_211|>",
"⦅reserved_special_token_211⦆"
],
[
"<|reserved_special_token_212|>",
"⦅reserved_special_token_212⦆"
],
[
"<|reserved_special_token_213|>",
"⦅reserved_special_token_213⦆"
],
[
"<|reserved_special_token_214|>",
"⦅reserved_special_token_214⦆"
],
[
"<|reserved_special_token_215|>",
"⦅reserved_special_token_215⦆"
],
[
"<|reserved_special_token_216|>",
"⦅reserved_special_token_216⦆"
],
[
"<|reserved_special_token_217|>",
"⦅reserved_special_token_217⦆"
],
[
"<|reserved_special_token_218|>",
"⦅reserved_special_token_218⦆"
],
[
"<|reserved_special_token_219|>",
"⦅reserved_special_token_219⦆"
],
[
"<|reserved_special_token_220|>",
"⦅reserved_special_token_220⦆"
],
[
"<|reserved_special_token_221|>",
"⦅reserved_special_token_221⦆"
],
[
"<|reserved_special_token_222|>",
"⦅reserved_special_token_222⦆"
],
[
"<|reserved_special_token_223|>",
"⦅reserved_special_token_223⦆"
],
[
"<|reserved_special_token_224|>",
"⦅reserved_special_token_224⦆"
],
[
"<|reserved_special_token_225|>",
"⦅reserved_special_token_225⦆"
],
[
"<|reserved_special_token_226|>",
"⦅reserved_special_token_226⦆"
],
[
"<|reserved_special_token_227|>",
"⦅reserved_special_token_227⦆"
],
[
"<|reserved_special_token_228|>",
"⦅reserved_special_token_228⦆"
],
[
"<|reserved_special_token_229|>",
"⦅reserved_special_token_229⦆"
],
[
"<|reserved_special_token_230|>",
"⦅reserved_special_token_230⦆"
],
[
"<|reserved_special_token_231|>",
"⦅reserved_special_token_231⦆"
],
[
"<|reserved_special_token_232|>",
"⦅reserved_special_token_232⦆"
],
[
"<|reserved_special_token_233|>",
"⦅reserved_special_token_233⦆"
],
[
"<|reserved_special_token_234|>",
"⦅reserved_special_token_234⦆"
],
[
"<|reserved_special_token_235|>",
"⦅reserved_special_token_235⦆"
],
[
"<|reserved_special_token_236|>",
"⦅reserved_special_token_236⦆"
],
[
"<|reserved_special_token_237|>",
"⦅reserved_special_token_237⦆"
],
[
"<|reserved_special_token_238|>",
"⦅reserved_special_token_238⦆"
],
[
"<|reserved_special_token_239|>",
"⦅reserved_special_token_239⦆"
],
[
"<|reserved_special_token_240|>",
"⦅reserved_special_token_240⦆"
],
[
"<|reserved_special_token_241|>",
"⦅reserved_special_token_241⦆"
],
[
"<|reserved_special_token_242|>",
"⦅reserved_special_token_242⦆"
],
[
"<|reserved_special_token_243|>",
"⦅reserved_special_token_243⦆"
],
[
"<|reserved_special_token_244|>",
"⦅reserved_special_token_244⦆"
],
[
"<|reserved_special_token_245|>",
"⦅reserved_special_token_245⦆"
],
[
"<|reserved_special_token_246|>",
"⦅reserved_special_token_246⦆"
],
[
"<|reserved_special_token_247|>",
"⦅reserved_special_token_247⦆"
],
[
"<|reserved_special_token_248|>",
"⦅reserved_special_token_248⦆"
],
[
"<|reserved_special_token_249|>",
"⦅reserved_special_token_249⦆"
],
[
"<|reserved_special_token_250|>",
"⦅reserved_special_token_250⦆"
]
],
"src_subword_type": "bpe"
}
},
"model": {
"head_dim": null,
"sliding_window": 0,
"architecture": "transformer_lm",
"num_experts": 0,
"heads": 32,
"hidden_size": 4096,
"norm_eps": 1e-05,
"num_experts_per_tok": 0,
"mlp_activation_fn": "gated-silu",
"left_pad": true,
"layers": 32,
"layer_norm": "rms",
"shared_layer_norm": false,
"add_ffnbias": false,
"heads_kv": 8,
"add_qkvbias": false,
"parallel_residual": false,
"transformer_ff": 14336,
"embeddings": {
"n_positions": 0,
"tgt_word_vec_size": 4096,
"position_encoding_type": "Rotary",
"src_word_vec_size": 4096
},
"rope_config": {
"rotary_interleave": false,
"rotary_theta": 500000
},
"decoder": {
"position_encoding_type": "Rotary",
"n_positions": 0,
"decoder_type": "transformer_lm",
"tgt_word_vec_size": 4096,
"rope_config": {
"rotary_interleave": false,
"rotary_theta": 500000
}
}
},
"inference": {
"optional_eos": [],
"top_p": 0.9,
"temperature": 0.6,
"max_length": 4096,
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
}
}