{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 512, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 1 ], "cls": [ "", 3 ], "trim_offsets": true, "add_prefix_space": true }, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "A": 5, "B": 6, "C": 7, "D": 8, "G": 9, "H": 10, "K": 11, "M": 12, "N": 13, "R": 14, "S": 15, "T": 16, "V": 17, "W": 18, "Y": 19, "AA": 20, "TT": 21, "GC": 22, "GA": 23, "TC": 24, "TA": 25, "TG": 26, "CC": 27, "CA": 28, "GG": 29, "CG": 30, "AC": 31, "AG": 32, "GT": 33, "AT": 34, "CT": 35, "ATT": 36, "GAA": 37, "CAA": 38, "TAA": 39, "GAT": 40, "ATC": 41, "GTT": 42, "CTT": 43, "GCA": 44, "GCG": 45, "ACC": 46, "GCT": 47, "GAC": 48, "GCC": 49, "GAG": 50, "GTC": 51, "TAT": 52, "TGG": 53, "CTC": 54, "ACA": 55, "TGT": 56, "TAC": 57, "AAAA": 58, "TTTT": 59, "AAAT": 60, "TAG": 61, "GGT": 62, "ATTT": 63, "AAGA": 64, "TAAA": 65, "AATA": 66, "TATT": 67, "TTTA": 68, "AGAA": 69, "AATT": 70, "ATAT": 71, "TTAT": 72, "TCAA": 73, "ATAA": 74, "TGAA": 75, "ATCA": 76, "GAAG": 77, "TGAT": 78, "GAAA": 79, "ATTA": 80, "TTAA": 81, "TTCA": 82, "TAAT": 83, "AACA": 84, "AAAG": 85, "TTGA": 86, "CAAA": 87, "ATGA": 88, "ACAA": 89, "TTCT": 90, "GGG": 91, "TCTT": 92, "GATG": 93, "TGTT": 94, "TTTG": 95, "TTTC": 96, "AATG": 97, "TCAT": 98, "CTTT": 99, "CATC": 100, "CGGC": 101, "TGGT": 102, "AAAC": 103, "CTTC": 104, "AGAT": 105, "GCTG": 106, "CAAC": 107, "GGCG": 108, "CAAG": 109, "CAAT": 110, "ATTG": 111, "GCCG": 112, "TGGA": 113, "ACCA": 114, "CATT": 115, "GATT": 116, "GTTT": 117, "TTGT": 118, "TATA": 119, "AACT": 120, "CTGG": 121, "ATGG": 122, "CGCC": 123, "AATC": 124, "CGAC": 125, "GAAC": 126, "AGCA": 127, "GAAT": 128, "GCAA": 129, "GTTC": 130, "ATCT": 131, "AGTT": 132, "GGTG": 133, "ATTC": 134, "CAGC": 135, "TGCT": 136, "GATA": 137, "GCGC": 138, "GACG": 139, "TATC": 140, "CAGA": 141, "GCAG": 142, "GGAA": 143, "AAGT": 144, "TCGA": 145, "GTTG": 146, "AAGG": 147, "CTGA": 148, "AAGC": 149, "ACAT": 150, "CACC": 151, "ACGA": 152, "TGGC": 153, "ACTT": 154, "CCAA": 155, "CCGC": 156, "ATGT": 157, "CGCG": 158, "TTGG": 159, "TACA": 160, "TCCA": 161, "CGAA": 162, "TCTG": 163, "AGGA": 164, "GCGG": 165, "CTGC": 166, "CCAG": 167, "ATCG": 168, "TCAG": 169, "TGAC": 170, "GTCG": 171, "GATC": 172, "TTCC": 173, "CGAT": 174, "TATG": 175, "GTGG": 176, "TGCA": 177, "AACG": 178, "GGCA": 179, "CGTC": 180, "TTGC": 181, "GAGA": 182, "TTCG": 183, "CTTG": 184, "AACC": 185, "CCAT": 186, "TTAC": 187, "GCGA": 188, "GCTT": 189, "GTAT": 190, "AGCT": 191, "CCGA": 192, "TCGT": 193, "GTAA": 194, "GCCA": 195, "GACA": 196, "GGTT": 197, "GACC": 198, "ACCG": 199, "CAGG": 200, "ATGC": 201, "CCTG": 202, "CGAG": 203, "GTCA": 204, "TGTA": 205, "ACTG": 206, "ATAC": 207, "CATG": 208, "CCGG": 209, "GGAT": 210, "ACAG": 211, "TACT": 212, "TCGC": 213, "GTGA": 214, "GCAT": 215, "AGTA": 216, "AGGT": 217, "ACCT": 218, "CGGT": 219, "GTTA": 220, "TCAC": 221, "TAAC": 222, "CGTT": 223, "ATCC": 224, "TCCT": 225, "CGCA": 226, "GGCC": 227, "CTCG": 228, "TCGG": 229, "GAGG": 230, "CGCT": 231, "ACGC": 232, "CTGT": 233, "CAGT": 234, "GAGC": 235, "CCTT": 236, "GGTC": 237, "GGAG": 238, "AGAG": 239, "GCTC": 240, "GGTA": 241, "CTAT": 242, "AGAC": 243, "ACGG": 244, "CATA": 245, "CGTG": 246, "TCTA": 247, "ACAC": 248, "TGAG": 249, "TGCC": 250, "TCTC": 251, "GCAC": 252, "CCAC": 253, "TAGA": 254, "GGCT": 255, "AGCG": 256, "TGTC": 257, "GCGT": 258, "CTAC": 259, "CTCA": 260, "ACTA": 261, "CACA": 262, "CTAA": 263, "AGGC": 264, "ACG": 265, "ACGT": 266, "TGGG": 267, "TACC": 268, "CCCG": 269, "GTGC": 270, "CACG": 271, "TGCG": 272, "GGGC": 273, "CCTC": 274, "TGTG": 275, "GTGT": 276, "CTCT": 277, "TAAG": 278, "CCGT": 279, "GGAC": 280, "GAGT": 281, "TTAG": 282, "GTAC": 283, "GTCT": 284, "GACT": 285, "CGGA": 286, "ATAG": 287, "CTTA": 288, "ACTC": 289, "TCCG": 290, "AGCC": 291, "ACCC": 292, "CTCC": 293, "CGGG": 294, "AGTG": 295, "GCCT": 296, "GCCC": 297, "CACT": 298, "GCTA": 299, "TACG": 300, "GGGT": 301, "TAGT": 302, "CCCA": 303, "AGTC": 304, "GGGA": 305, "GTAG": 306, "GTCC": 307, "CGTA": 308, "AGGG": 309, "TCCC": 310, "TAGC": 311, "CCCT": 312, "CCCC": 313, "GGGG": 314, "CCTA": 315, "TAGG": 316, "CTAG": 317, "NN": 318, "NNNN": 319, "TY": 320, "RA": 321, "YA": 322, "TR": 323, "AR": 324, "AY": 325, "YT": 326, "CY": 327, "CR": 328, "YG": 329, "NNN": 330, "RT": 331, "RG": 332, "GY": 333, "GR": 334, "YC": 335, "AAA": 336, "TN": 337, "RC": 338, "AN": 339, "TW": 340, "NA": 341, "TK": 342, "WA": 343, "KG": 344, "CS": 345, "MA": 346, "TTT": 347, "SG": 348, "MC": 349, "AW": 350, "GN": 351, "GK": 352, "CM": 353, "AAT": 354, "CN": 355, "AM": 356, "NG": 357, "CW": 358, "WT": 359, "GS": 360, "KA": 361, "SC": 362, "NT": 363, "TM": 364, "NC": 365, "CK": 366, "WC": 367, "KT": 368, "AAG": 369, "MT": 370, "WG": 371, "TTG": 372, "TTA": 373, "TS": 374, "GAY": 375, "MG": 376, "AAC": 377, "ATA": 378, "CCA": 379, "KC": 380, "AK": 381, "GM": 382, "GAR": 383, "AS": 384, "ST": 385, "YTC": 386, "SA": 387, "GW": 388, "TTC": 389, "GCY": 390, "AGT": 391, "CAT": 392, "RTC": 393, "RAAA": 394, "TTTY": 395, "CCC": 396, "ACT": 397, "CTG": 398, "AGG": 399, "TTYA": 400, "CGG": 401, "TYAA": 402, "GTA": 403, "CCT": 404, "TRAA": 405, "ATG": 406, "GCR": 407, "TTYT": 408, "RR": 409, "YAAA": 410, "AARA": 411, "CCG": 412, "ARAA": 413, "RAAT": 414, "CAG": 415, "ANNN": 416, "WM": 417, "ATTY": 418, "TNNN": 419, "NNNA": 420, "WW": 421, "YTTT": 422, "TTYG": 423, "NNNG": 424, "GNNN": 425, "AAAN": 426, "TTRA": 427, "GTG": 428, "YY": 429, "AAAR": 430, "CAC": 431, "TYTT": 432, "CGT": 433, "NNNC": 434, "CTA": 435, "TYAT": 436, "YAAT": 437, "TRAT": 438, "CRAA": 439, "TTTN": 440, "ATYA": 441, "WY": 442, "GTTY": 443, "CNNN": 444, "RTTT": 445, "AATY": 446, "YATT": 447, "ATTR": 448, "CTTY": 449, "AYTT": 450, "TAYT": 451, "ATRA": 452, "AAYA": 453, "GAN": 454, "TCRA": 455, "RATA": 456, "AART": 457, "CAAR": 458, "TYGA": 459, "ARTT": 460, "AGC": 461, "RAAC": 462, "WR": 463, "AAAY": 464, "ATYT": 465, "AYAT": 466, "NNNT": 467, "RATT": 468, "RGAA": 469, "YTGT": 470, "AARG": 471, "ACRA": 472, "GAAR": 473, "NAAA": 474, "TTTR": 475, "TCA": 476, "GRAA": 477, "CCSC": 478, "RAAG": 479, "YTAT": 480, "AAYT": 481, "ARAT": 482, "ATAY": 483, "YAAC": 484, "TTCR": 485, "GGSG": 486, "ARGA": 487, "AYAA": 488, "YGAA": 489, "YCAT": 490, "GAK": 491, "TCTY": 492, "TATY": 493, "TYTA": 494, "TYGT": 495, "GSGG": 496, "GAM": 497, "CTRA": 498, "TYCA": 499 }, "merges": [ "A A", "T T", "G C", "G A", "T C", "T A", "T G", "C C", "C A", "G G", "C G", "A C", "A G", "G T", "A T", "C T", "A TT", "G AA", "C AA", "T AA", "GA T", "A TC", "G TT", "C TT", "GC A", "GC G", "A CC", "GC T", "GA C", "GC C", "GA G", "G TC", "TA T", "TG G", "C TC", "A CA", "TG T", "TA C", "AA AA", "TT TT", "AA AT", "TA G", "GG T", "ATT T", "AA GA", "TAA A", "AA TA", "TA TT", "TT TA", "AG AA", "AA TT", "A TAT", "TT AT", "TC AA", "AT AA", "TG AA", "ATC A", "GAA G", "T GAT", "GAA A", "ATT A", "TT AA", "TT CA", "TAA T", "AA CA", "AA AG", "TT GA", "CAA A", "AT GA", "AC AA", "TT CT", "GG G", "TC TT", "GA TG", "TG TT", "TT TG", "TT TC", "AA TG", "TC AT", "CTT T", "CA TC", "CG GC", "TG GT", "AA AC", "CTT C", "A GAT", "GC TG", "CAA C", "G GCG", "CAA G", "CAA T", "ATT G", "GC CG", "TG GA", "ACC A", "CA TT", "GA TT", "GTT T", "TT GT", "TA TA", "AA CT", "C TGG", "A TGG", "C GCC", "AA TC", "C GAC", "GAA C", "A GCA", "GAA T", "GC AA", "GTT C", "ATC T", "AG TT", "GG TG", "ATT C", "CA GC", "T GCT", "GA TA", "GC GC", "GA CG", "TA TC", "CA GA", "GC AG", "GG AA", "AA GT", "TC GA", "GTT G", "AA GG", "CT GA", "AA GC", "ACA T", "CA CC", "AC GA", "TG GC", "AC TT", "CC AA", "CC GC", "A TGT", "C GCG", "TT GG", "TA CA", "TC CA", "CG AA", "TC TG", "AG GA", "GC GG", "CT GC", "CC AG", "ATC G", "TC AG", "T GAC", "GTC G", "GA TC", "TT CC", "C GAT", "TA TG", "G TGG", "T GCA", "AA CG", "G GCA", "CG TC", "TT GC", "GA GA", "TT CG", "CTT G", "AA CC", "CC AT", "TT AC", "GC GA", "GC TT", "G TAT", "A GCT", "CC GA", "TC GT", "GT AA", "GC CA", "GA CA", "GG TT", "GA CC", "ACC G", "CA GG", "AT GC", "CC TG", "C GAG", "GTC A", "TG TA", "AC TG", "A TAC", "CA TG", "CC GG", "G GAT", "ACA G", "TA CT", "TC GC", "GT GA", "GC AT", "AG TA", "A GGT", "ACC T", "C GGT", "GTT A", "TC AC", "TAA C", "CG TT", "ATC C", "TC CT", "C GCA", "G GCC", "CTC G", "TC GG", "GA GG", "C GCT", "AC GC", "C TGT", "CA GT", "GA GC", "CC TT", "GG TC", "G GAG", "A GAG", "GC TC", "GG TA", "C TAT", "A GAC", "AC GG", "CA TA", "CG TG", "TC TA", "ACA C", "T GAG", "T GCC", "TC TC", "GC AC", "CC AC", "TA GA", "G GCT", "A GCG", "TG TC", "GC GT", "C TAC", "CTC A", "AC TA", "CA CA", "CT AA", "AG GC", "A CG", "ACG T", "TG GG", "TA CC", "CC CG", "GT GC", "CA CG", "T GCG", "GG GC", "CC TC", "TG TG", "G TGT", "CTC T", "TAA G", "CC GT", "G GAC", "GA GT", "TT AG", "G TAC", "GTC T", "GA CT", "CG GA", "A TAG", "CTT A", "AC TC", "TC CG", "A GCC", "ACC C", "CTC C", "C GGG", "AG TG", "GC CT", "GC CC", "CA CT", "GC TA", "TA CG", "GG GT", "TA GT", "CC CA", "AG TC", "GG GA", "G TAG", "GTC C", "CG TA", "A GGG", "TC CC", "TA GC", "CC CT", "CC CC", "GG GG", "CC TA", "TA GG", "C TAG", "N N", "NN NN", "T Y", "R A", "Y A", "T R", "A R", "A Y", "Y T", "C Y", "C R", "Y G", "NN N", "R T", "R G", "G Y", "G R", "Y C", "AA A", "T N", "R C", "A N", "T W", "N A", "T K", "W A", "K G", "C S", "M A", "TT T", "S G", "M C", "A W", "G N", "G K", "C M", "AA T", "C N", "A M", "N G", "C W", "W T", "G S", "K A", "S C", "N T", "T M", "N C", "C K", "W C", "K T", "AA G", "M T", "W G", "TT G", "TT A", "T S", "GA Y", "M G", "AA C", "A TA", "CC A", "K C", "A K", "G M", "GA R", "A S", "S T", "Y TC", "S A", "G W", "TT C", "GC Y", "AG T", "CA T", "R TC", "R AAA", "TT TY", "CC C", "AC T", "C TG", "A GG", "TT YA", "C GG", "TY AA", "G TA", "CC T", "TR AA", "A TG", "GC R", "TT YT", "R R", "Y AAA", "AA RA", "CC G", "AR AA", "R AAT", "CA G", "A NNN", "W M", "ATT Y", "T NNN", "NNN A", "W W", "Y TTT", "TT YG", "NNN G", "G NNN", "AAA N", "TT RA", "G TG", "Y Y", "AA AR", "CA C", "TY TT", "CG T", "NNN C", "C TA", "TY AT", "Y AAT", "TR AT", "CR AA", "TT TN", "AT YA", "W Y", "GTT Y", "C NNN", "R TTT", "AA TY", "Y ATT", "ATT R", "CTT Y", "AY TT", "TA YT", "AT RA", "AA YA", "GA N", "TC RA", "RA TA", "AA RT", "CAA R", "TY GA", "AR TT", "A GC", "R AAC", "W R", "AA AY", "AT YT", "AY AT", "NNN T", "R ATT", "R GAA", "Y TGT", "AA RG", "AC RA", "GAA R", "N AAA", "TT TR", "TC A", "GR AA", "CC SC", "R AAG", "Y TAT", "AA YT", "AR AT", "ATA Y", "Y AAC", "TT CR", "GG SG", "AR GA", "AY AA", "Y GAA", "Y CAT", "GA K", "TC TY", "TAT Y", "TY TA", "TY GT", "GS GG", "GA M", "CT RA", "TY CA" ] } }