|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<cls>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<pad>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<eos>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "<mask>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 5, |
|
"content": "<sep>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "Split", |
|
"pattern": { |
|
"Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" |
|
}, |
|
"behavior": "Isolated", |
|
"invert": false |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "<cls>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "<eos>", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "<cls>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "<eos>", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "<eos>", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"<cls>": { |
|
"id": "<cls>", |
|
"ids": [ |
|
2 |
|
], |
|
"tokens": [ |
|
"<cls>" |
|
] |
|
}, |
|
"<eos>": { |
|
"id": "<eos>", |
|
"ids": [ |
|
5 |
|
], |
|
"tokens": [ |
|
"<eos>" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": null, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": "<unk>", |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"ignore_merges": true, |
|
"vocab": { |
|
"<cls>": 0, |
|
"<pad>": 1, |
|
"<eos>": 2, |
|
"<unk>": 3, |
|
"<mask>": 4, |
|
"<sep>": 5, |
|
"c": 6, |
|
"C": 7, |
|
"(": 8, |
|
")": 9, |
|
"O": 10, |
|
"1": 11, |
|
"2": 12, |
|
"=": 13, |
|
"N": 14, |
|
".": 15, |
|
"n": 16, |
|
"3": 17, |
|
"F": 18, |
|
"Cl": 19, |
|
">>": 20, |
|
"~": 21, |
|
"-": 22, |
|
"4": 23, |
|
"[C@H]": 24, |
|
"S": 25, |
|
"[C@@H]": 26, |
|
"[O-]": 27, |
|
"Br": 28, |
|
"#": 29, |
|
"/": 30, |
|
"[nH]": 31, |
|
"[N+]": 32, |
|
"s": 33, |
|
"5": 34, |
|
"o": 35, |
|
"P": 36, |
|
"[Na+]": 37, |
|
"[Si]": 38, |
|
"I": 39, |
|
"[Na]": 40, |
|
"[Pd]": 41, |
|
"[K+]": 42, |
|
"[K]": 43, |
|
"[P]": 44, |
|
"B": 45, |
|
"[C@]": 46, |
|
"[C@@]": 47, |
|
"[Cl-]": 48, |
|
"6": 49, |
|
"[OH-]": 50, |
|
"\\": 51, |
|
"[N-]": 52, |
|
"[Li]": 53, |
|
"[H]": 54, |
|
"[2H]": 55, |
|
"[NH4+]": 56, |
|
"[c-]": 57, |
|
"[P-]": 58, |
|
"[Cs+]": 59, |
|
"[Li+]": 60, |
|
"[Cs]": 61, |
|
"[NaH]": 62, |
|
"[H-]": 63, |
|
"[O+]": 64, |
|
"[BH4-]": 65, |
|
"[Cu]": 66, |
|
"7": 67, |
|
"[Mg]": 68, |
|
"[Fe+2]": 69, |
|
"[n+]": 70, |
|
"[Sn]": 71, |
|
"[BH-]": 72, |
|
"[Pd+2]": 73, |
|
"[CH]": 74, |
|
"[I-]": 75, |
|
"[Br-]": 76, |
|
"[C-]": 77, |
|
"[Zn]": 78, |
|
"[B-]": 79, |
|
"[F-]": 80, |
|
"[Al]": 81, |
|
"[P+]": 82, |
|
"[BH3-]": 83, |
|
"[Fe]": 84, |
|
"[C]": 85, |
|
"[AlH4]": 86, |
|
"[Ni]": 87, |
|
"[SiH]": 88, |
|
"8": 89, |
|
"[Cu+2]": 90, |
|
"[Mn]": 91, |
|
"[AlH]": 92, |
|
"[nH+]": 93, |
|
"[AlH4-]": 94, |
|
"[O-2]": 95, |
|
"[Cr]": 96, |
|
"[Mg+2]": 97, |
|
"[NH3+]": 98, |
|
"[S@]": 99, |
|
"[Pt]": 100, |
|
"[Al+3]": 101, |
|
"[S@@]": 102, |
|
"[S-]": 103, |
|
"[Ti]": 104, |
|
"[Zn+2]": 105, |
|
"[PH]": 106, |
|
"[NH2+]": 107, |
|
"[Ru]": 108, |
|
"[Ag+]": 109, |
|
"[S+]": 110, |
|
"[I+3]": 111, |
|
"[NH+]": 112, |
|
"[Ca+2]": 113, |
|
"[Ag]": 114, |
|
"9": 115, |
|
"[Os]": 116, |
|
"[Se]": 117, |
|
"[SiH2]": 118, |
|
"[Ca]": 119, |
|
"[Ti+4]": 120, |
|
"[Ac]": 121, |
|
"[Cu+]": 122, |
|
"[S]": 123, |
|
"[Rh]": 124, |
|
"[Cl+3]": 125, |
|
"[cH-]": 126, |
|
"[Zn+]": 127, |
|
"[O]": 128, |
|
"[Cl+]": 129, |
|
"[SH]": 130, |
|
"[H+]": 131, |
|
"[Pd+]": 132, |
|
"[se]": 133, |
|
"[PH+]": 134, |
|
"[I]": 135, |
|
"[Pt+2]": 136, |
|
"[C+]": 137, |
|
"[Mg+]": 138, |
|
"[Hg]": 139, |
|
"[W]": 140, |
|
"[SnH]": 141, |
|
"[SiH3]": 142, |
|
"[Fe+3]": 143, |
|
"[NH]": 144, |
|
"[Mo]": 145, |
|
"[CH2+]": 146, |
|
"%10": 147, |
|
"[CH2-]": 148, |
|
"[CH2]": 149, |
|
"[n-]": 150, |
|
"[Ce+4]": 151, |
|
"[NH-]": 152, |
|
"[Co]": 153, |
|
"[I+]": 154, |
|
"[PH2]": 155, |
|
"[Pt+4]": 156, |
|
"[Ce]": 157, |
|
"[B]": 158, |
|
"[Sn+2]": 159, |
|
"[Ba+2]": 160, |
|
"%11": 161, |
|
"[Fe-3]": 162, |
|
"[18F]": 163, |
|
"[SH-]": 164, |
|
"[Pb+2]": 165, |
|
"[Os-2]": 166, |
|
"[Zr+4]": 167, |
|
"[N]": 168, |
|
"[Ir]": 169, |
|
"[Bi]": 170, |
|
"[Ni+2]": 171, |
|
"[P@]": 172, |
|
"[Co+2]": 173, |
|
"[s+]": 174, |
|
"[As]": 175, |
|
"[P+3]": 176, |
|
"[Hg+2]": 177, |
|
"[Yb+3]": 178, |
|
"[CH-]": 179, |
|
"[Zr+2]": 180, |
|
"[Mn+2]": 181, |
|
"[CH+]": 182, |
|
"[In]": 183, |
|
"[KH]": 184, |
|
"[Ce+3]": 185, |
|
"[Zr]": 186, |
|
"[AlH2-]": 187, |
|
"[OH2+]": 188, |
|
"[Ti+3]": 189, |
|
"[Rh+2]": 190, |
|
"[Sb]": 191, |
|
"[S-2]": 192, |
|
"%12": 193, |
|
"[P@@]": 194, |
|
"[Si@H]": 195, |
|
"[Mn+4]": 196, |
|
"p": 197, |
|
"[Ba]": 198, |
|
"[NH2-]": 199, |
|
"[Ge]": 200, |
|
"[Pb+4]": 201, |
|
"[Cr+3]": 202, |
|
"[Au]": 203, |
|
"[LiH]": 204, |
|
"[Sc+3]": 205, |
|
"[o+]": 206, |
|
"[Rh-3]": 207, |
|
"%13": 208, |
|
"[Br]": 209, |
|
"[Sb-]": 210, |
|
"[S@+]": 211, |
|
"[I+2]": 212, |
|
"[Ar]": 213, |
|
"[V]": 214, |
|
"[Cu-]": 215, |
|
"[Al-]": 216, |
|
"[Te]": 217, |
|
"[13c]": 218, |
|
"[13C]": 219, |
|
"[Cl]": 220, |
|
"[PH4+]": 221, |
|
"[SiH4]": 222, |
|
"[te]": 223, |
|
"[CH3-]": 224, |
|
"[S@@+]": 225, |
|
"[Rh+3]": 226, |
|
"[SH+]": 227, |
|
"[Bi+3]": 228, |
|
"[Br+2]": 229, |
|
"[La]": 230, |
|
"[La+3]": 231, |
|
"[Pt-2]": 232, |
|
"[N@@]": 233, |
|
"[PH3+]": 234, |
|
"[N@]": 235, |
|
"[Si+4]": 236, |
|
"[Sr+2]": 237, |
|
"[Al+]": 238, |
|
"[Pb]": 239, |
|
"[SeH]": 240, |
|
"[Si-]": 241, |
|
"[V+5]": 242, |
|
"[Y+3]": 243, |
|
"[Re]": 244, |
|
"[Ru+]": 245, |
|
"[Sm]": 246, |
|
"*": 247, |
|
"[3H]": 248, |
|
"[NH2]": 249, |
|
"[Ag-]": 250, |
|
"[13CH3]": 251, |
|
"[OH+]": 252, |
|
"[Ru+3]": 253, |
|
"[OH]": 254, |
|
"[Gd+3]": 255, |
|
"[13CH2]": 256, |
|
"[In+3]": 257, |
|
"[Si@@]": 258, |
|
"[Si@]": 259, |
|
"[Ti+2]": 260, |
|
"[Sn+]": 261, |
|
"[Cl+2]": 262, |
|
"[AlH-]": 263, |
|
"[Pd-2]": 264, |
|
"[SnH3]": 265, |
|
"[B+3]": 266, |
|
"[Cu-2]": 267, |
|
"[Nd+3]": 268, |
|
"[Pb+3]": 269, |
|
"[13cH]": 270, |
|
"[Fe-4]": 271, |
|
"[Ga]": 272, |
|
"[Sn+4]": 273, |
|
"[Hg+]": 274, |
|
"[11CH3]": 275, |
|
"[Hf]": 276, |
|
"[Pr]": 277, |
|
"[Y]": 278, |
|
"[S+2]": 279, |
|
"[Cd]": 280, |
|
"[Cr+6]": 281, |
|
"[Zr+3]": 282, |
|
"[Rh+]": 283, |
|
"[CH3]": 284, |
|
"[N-3]": 285, |
|
"[Hf+2]": 286, |
|
"[Th]": 287, |
|
"[Sb+3]": 288, |
|
"%14": 289, |
|
"[Cr+2]": 290, |
|
"[Ru+2]": 291, |
|
"[Hf+4]": 292, |
|
"[14C]": 293, |
|
"[Ta]": 294, |
|
"[Tl+]": 295, |
|
"[B+]": 296, |
|
"[Os+4]": 297, |
|
"[PdH2]": 298, |
|
"[Pd-]": 299, |
|
"[Cd+2]": 300, |
|
"[Co+3]": 301, |
|
"[S+4]": 302, |
|
"[Nb+5]": 303, |
|
"[123I]": 304, |
|
"[c+]": 305, |
|
"[Rb+]": 306, |
|
"[V+2]": 307, |
|
"[CH3+]": 308, |
|
"[Ag+2]": 309, |
|
"[cH+]": 310, |
|
"[Mn+3]": 311, |
|
"[Se-]": 312, |
|
"[As-]": 313, |
|
"[Eu+3]": 314, |
|
"[SH2]": 315, |
|
"[Sm+3]": 316, |
|
"[IH+]": 317, |
|
"%15": 318, |
|
"[OH3+]": 319, |
|
"[PH3]": 320, |
|
"[IH2+]": 321, |
|
"[SH2+]": 322, |
|
"[Ir+3]": 323, |
|
"[AlH3]": 324, |
|
"[Sc]": 325, |
|
"[Yb]": 326, |
|
"[15NH2]": 327, |
|
"[Lu]": 328, |
|
"[sH+]": 329, |
|
"[Gd]": 330, |
|
"[18F-]": 331, |
|
"[SH3+]": 332, |
|
"[SnH4]": 333, |
|
"[TeH]": 334, |
|
"[Si@@H]": 335, |
|
"[Ga+3]": 336, |
|
"[CaH2]": 337, |
|
"[Tl]": 338, |
|
"[Ta+5]": 339, |
|
"[GeH]": 340, |
|
"[Br+]": 341, |
|
"[Sr]": 342, |
|
"[Tl+3]": 343, |
|
"[Sm+2]": 344, |
|
"[PH5]": 345, |
|
"%16": 346, |
|
"[N@@+]": 347, |
|
"[Au+3]": 348, |
|
"[C-4]": 349, |
|
"[Nd]": 350, |
|
"[Ti+]": 351, |
|
"[IH]": 352, |
|
"[N@+]": 353, |
|
"[125I]": 354, |
|
"[Eu]": 355, |
|
"[Sn+3]": 356, |
|
"[Nb]": 357, |
|
"[Er+3]": 358, |
|
"[123I-]": 359, |
|
"[14c]": 360, |
|
"%17": 361, |
|
"[SnH2]": 362, |
|
"[YH]": 363, |
|
"[Sb+5]": 364, |
|
"[Pr+3]": 365, |
|
"[Ir+]": 366, |
|
"[N+3]": 367, |
|
"[AlH2]": 368, |
|
"[19F]": 369, |
|
"%18": 370, |
|
"[Tb]": 371, |
|
"[14CH]": 372, |
|
"[Mo+4]": 373, |
|
"[Si+]": 374, |
|
"[BH]": 375, |
|
"[Be]": 376, |
|
"[Rb]": 377, |
|
"[pH]": 378, |
|
"%19": 379, |
|
"%20": 380, |
|
"[Xe]": 381, |
|
"[Ir-]": 382, |
|
"[Be+2]": 383, |
|
"[C+4]": 384, |
|
"[RuH2]": 385, |
|
"[15NH]": 386, |
|
"[U+2]": 387, |
|
"[Au-]": 388, |
|
"%21": 389, |
|
"%22": 390, |
|
"[Au+]": 391, |
|
"[15n]": 392, |
|
"[Al+2]": 393, |
|
"[Tb+3]": 394, |
|
"[15N]": 395, |
|
"[V+3]": 396, |
|
"[W+6]": 397, |
|
"[14CH3]": 398, |
|
"[Cr+4]": 399, |
|
"[ClH+]": 400, |
|
"b": 401, |
|
"[Ti+6]": 402, |
|
"[Nd+]": 403, |
|
"[Zr+]": 404, |
|
"[PH2+]": 405, |
|
"[Fm]": 406, |
|
"[N@H+]": 407, |
|
"[RuH]": 408, |
|
"[Dy+3]": 409, |
|
"%23": 410, |
|
"[Hf+3]": 411, |
|
"[W+4]": 412, |
|
"[11C]": 413, |
|
"[13CH]": 414, |
|
"[Er]": 415, |
|
"[124I]": 416, |
|
"[LaH]": 417, |
|
"[F]": 418, |
|
"[siH]": 419, |
|
"[Ga+]": 420, |
|
"[Cm]": 421, |
|
"[GeH3]": 422, |
|
"[IH-]": 423, |
|
"[U+6]": 424, |
|
"[SeH+]": 425, |
|
"[32P]": 426, |
|
"[SeH-]": 427, |
|
"[Pt-]": 428, |
|
"[Ir+2]": 429, |
|
"[se+]": 430, |
|
"[U]": 431, |
|
"[F+]": 432, |
|
"[BH2]": 433, |
|
"[As+]": 434, |
|
"[Cf]": 435, |
|
"[ClH2+]": 436, |
|
"[Ni+]": 437, |
|
"[TeH3]": 438, |
|
"[SbH2]": 439, |
|
"[Ag+3]": 440, |
|
"%24": 441, |
|
"[18O]": 442, |
|
"[PH4]": 443, |
|
"[Os+2]": 444, |
|
"[Na-]": 445, |
|
"[Sb+2]": 446, |
|
"[V+4]": 447, |
|
"[Ho+3]": 448, |
|
"[68Ga]": 449, |
|
"[PH-]": 450, |
|
"[Bi+2]": 451, |
|
"[Ce+2]": 452, |
|
"[Pd+3]": 453, |
|
"[99Tc]": 454, |
|
"[13C@@H]": 455, |
|
"[Fe+6]": 456, |
|
"[c]": 457, |
|
"[GeH2]": 458, |
|
"[10B]": 459, |
|
"[Cu+3]": 460, |
|
"[Mo+2]": 461, |
|
"[Cr+]": 462, |
|
"[Pd+4]": 463, |
|
"[Dy]": 464, |
|
"[AsH]": 465, |
|
"[Ba+]": 466, |
|
"[SeH2]": 467, |
|
"[In+]": 468, |
|
"[TeH2]": 469, |
|
"[BrH+]": 470, |
|
"[14cH]": 471, |
|
"[W+]": 472, |
|
"[13C@H]": 473, |
|
"[AsH2]": 474, |
|
"[In+2]": 475, |
|
"[N+2]": 476, |
|
"[N@@H+]": 477, |
|
"[SbH]": 478, |
|
"[60Co]": 479, |
|
"[AsH4+]": 480, |
|
"[AsH3]": 481, |
|
"[18OH]": 482, |
|
"[Ru-2]": 483, |
|
"[Na-2]": 484, |
|
"[CuH2]": 485, |
|
"[31P]": 486, |
|
"[Ti+5]": 487, |
|
"[35S]": 488, |
|
"[P@@H]": 489, |
|
"[ArH]": 490, |
|
"[Co+]": 491, |
|
"[Zr-2]": 492, |
|
"[BH2-]": 493, |
|
"[131I]": 494, |
|
"[SH5]": 495, |
|
"[VH]": 496, |
|
"[B+2]": 497, |
|
"[Yb+2]": 498, |
|
"[14C@H]": 499, |
|
"[211At]": 500, |
|
"[NH3+2]": 501, |
|
"[IrH]": 502, |
|
"[IrH2]": 503, |
|
"[Rh-]": 504, |
|
"[Cr-]": 505, |
|
"[Sb+]": 506, |
|
"[Ni+3]": 507, |
|
"[TaH3]": 508, |
|
"[Tl+2]": 509, |
|
"[64Cu]": 510, |
|
"[Tc]": 511, |
|
"[Cd+]": 512, |
|
"[1H]": 513, |
|
"[15nH]": 514, |
|
"[AlH2+]": 515, |
|
"[FH+2]": 516, |
|
"[BiH3]": 517, |
|
"[Ru-]": 518, |
|
"[Mo+6]": 519, |
|
"[AsH+]": 520, |
|
"[BaH2]": 521, |
|
"[BaH]": 522, |
|
"[Fe+4]": 523, |
|
"[229Th]": 524, |
|
"[Th+4]": 525, |
|
"[As+3]": 526, |
|
"[NH+3]": 527, |
|
"[P@H]": 528, |
|
"[Li-]": 529, |
|
"[7NaH]": 530, |
|
"[Bi+]": 531, |
|
"[PtH+2]": 532, |
|
"[p-]": 533, |
|
"[Re+5]": 534, |
|
"[NiH]": 535, |
|
"[Ni-]": 536, |
|
"[Xe+]": 537, |
|
"[Ca+]": 538, |
|
"[11c]": 539, |
|
"[Rh+4]": 540, |
|
"[AcH]": 541, |
|
"[HeH]": 542, |
|
"[Sc+2]": 543, |
|
"[Mn+]": 544, |
|
"[UH]": 545, |
|
"[14CH2]": 546, |
|
"[SiH4+]": 547, |
|
"[18OH2]": 548, |
|
"[Ac-]": 549, |
|
"[Re+4]": 550, |
|
"[118Sn]": 551, |
|
"[153Sm]": 552, |
|
"[P+2]": 553, |
|
"[9CH]": 554, |
|
"[9CH3]": 555, |
|
"[Y-]": 556, |
|
"[NiH2]": 557, |
|
"[Si+2]": 558, |
|
"[Mn+6]": 559, |
|
"[ZrH2]": 560, |
|
"[C-2]": 561, |
|
"[Bi+5]": 562, |
|
"[24NaH]": 563, |
|
"[Fr]": 564, |
|
"[15CH]": 565, |
|
"[Se+]": 566, |
|
"[At]": 567, |
|
"[P-3]": 568, |
|
"[124I-]": 569, |
|
"[CuH2-]": 570, |
|
"[Nb+4]": 571, |
|
"[Nb+3]": 572, |
|
"[MgH]": 573, |
|
"[Ir+4]": 574, |
|
"[67Ga+3]": 575, |
|
"[67Ga]": 576, |
|
"[13N]": 577, |
|
"[15OH2]": 578, |
|
"[2NH]": 579, |
|
"[Ho]": 580, |
|
"[Cn]": 581 |
|
}, |
|
"merges": [] |
|
} |
|
} |