{ "version": "1.0", "added_tokens": [ { "id": 0, "content": "[MASK]", "special": true }, { "id": 1, "content": "[UNK]", "special": true } ], "pre_tokenizer": { "type": "KmerSplitter", "k": 4, "stride": 4, "max_length": 660 }, "model": { "type": "KmerTokenizer", "unk_token": "[UNK]", "vocab": { "[MASK]": 0, "[UNK]": 1, "AAAA": 2, "AAAC": 3, "AAAG": 4, "AAAT": 5, "AACA": 6, "AACC": 7, "AACG": 8, "AACT": 9, "AAGA": 10, "AAGC": 11, "AAGG": 12, "AAGT": 13, "AATA": 14, "AATC": 15, "AATG": 16, "AATT": 17, "ACAA": 18, "ACAC": 19, "ACAG": 20, "ACAT": 21, "ACCA": 22, "ACCC": 23, "ACCG": 24, "ACCT": 25, "ACGA": 26, "ACGC": 27, "ACGG": 28, "ACGT": 29, "ACTA": 30, "ACTC": 31, "ACTG": 32, "ACTT": 33, "AGAA": 34, "AGAC": 35, "AGAG": 36, "AGAT": 37, "AGCA": 38, "AGCC": 39, "AGCG": 40, "AGCT": 41, "AGGA": 42, "AGGC": 43, "AGGG": 44, "AGGT": 45, "AGTA": 46, "AGTC": 47, "AGTG": 48, "AGTT": 49, "ATAA": 50, "ATAC": 51, "ATAG": 52, "ATAT": 53, "ATCA": 54, "ATCC": 55, "ATCG": 56, "ATCT": 57, "ATGA": 58, "ATGC": 59, "ATGG": 60, "ATGT": 61, "ATTA": 62, "ATTC": 63, "ATTG": 64, "ATTT": 65, "CAAA": 66, "CAAC": 67, "CAAG": 68, "CAAT": 69, "CACA": 70, "CACC": 71, "CACG": 72, "CACT": 73, "CAGA": 74, "CAGC": 75, "CAGG": 76, "CAGT": 77, "CATA": 78, "CATC": 79, "CATG": 80, "CATT": 81, "CCAA": 82, "CCAC": 83, "CCAG": 84, "CCAT": 85, "CCCA": 86, "CCCC": 87, "CCCG": 88, "CCCT": 89, "CCGA": 90, "CCGC": 91, "CCGG": 92, "CCGT": 93, "CCTA": 94, "CCTC": 95, "CCTG": 96, "CCTT": 97, "CGAA": 98, "CGAC": 99, "CGAG": 100, "CGAT": 101, "CGCA": 102, "CGCC": 103, "CGCG": 104, "CGCT": 105, "CGGA": 106, "CGGC": 107, "CGGG": 108, "CGGT": 109, "CGTA": 110, "CGTC": 111, "CGTG": 112, "CGTT": 113, "CTAA": 114, "CTAC": 115, "CTAG": 116, "CTAT": 117, "CTCA": 118, "CTCC": 119, "CTCG": 120, "CTCT": 121, "CTGA": 122, "CTGC": 123, "CTGG": 124, "CTGT": 125, "CTTA": 126, "CTTC": 127, "CTTG": 128, "CTTT": 129, "GAAA": 130, "GAAC": 131, "GAAG": 132, "GAAT": 133, "GACA": 134, "GACC": 135, "GACG": 136, "GACT": 137, "GAGA": 138, "GAGC": 139, "GAGG": 140, "GAGT": 141, "GATA": 142, "GATC": 143, "GATG": 144, "GATT": 145, "GCAA": 146, "GCAC": 147, "GCAG": 148, "GCAT": 149, "GCCA": 150, "GCCC": 151, "GCCG": 152, "GCCT": 153, "GCGA": 154, "GCGC": 155, "GCGG": 156, "GCGT": 157, "GCTA": 158, "GCTC": 159, "GCTG": 160, "GCTT": 161, "GGAA": 162, "GGAC": 163, "GGAG": 164, "GGAT": 165, "GGCA": 166, "GGCC": 167, "GGCG": 168, "GGCT": 169, "GGGA": 170, "GGGC": 171, "GGGG": 172, "GGGT": 173, "GGTA": 174, "GGTC": 175, "GGTG": 176, "GGTT": 177, "GTAA": 178, "GTAC": 179, "GTAG": 180, "GTAT": 181, "GTCA": 182, "GTCC": 183, "GTCG": 184, "GTCT": 185, "GTGA": 186, "GTGC": 187, "GTGG": 188, "GTGT": 189, "GTTA": 190, "GTTC": 191, "GTTG": 192, "GTTT": 193, "TAAA": 194, "TAAC": 195, "TAAG": 196, "TAAT": 197, "TACA": 198, "TACC": 199, "TACG": 200, "TACT": 201, "TAGA": 202, "TAGC": 203, "TAGG": 204, "TAGT": 205, "TATA": 206, "TATC": 207, "TATG": 208, "TATT": 209, "TCAA": 210, "TCAC": 211, "TCAG": 212, "TCAT": 213, "TCCA": 214, "TCCC": 215, "TCCG": 216, "TCCT": 217, "TCGA": 218, "TCGC": 219, "TCGG": 220, "TCGT": 221, "TCTA": 222, "TCTC": 223, "TCTG": 224, "TCTT": 225, "TGAA": 226, "TGAC": 227, "TGAG": 228, "TGAT": 229, "TGCA": 230, "TGCC": 231, "TGCG": 232, "TGCT": 233, "TGGA": 234, "TGGC": 235, "TGGG": 236, "TGGT": 237, "TGTA": 238, "TGTC": 239, "TGTG": 240, "TGTT": 241, "TTAA": 242, "TTAC": 243, "TTAG": 244, "TTAT": 245, "TTCA": 246, "TTCC": 247, "TTCG": 248, "TTCT": 249, "TTGA": 250, "TTGC": 251, "TTGG": 252, "TTGT": 253, "TTTA": 254, "TTTC": 255, "TTTG": 256, "TTTT": 257 } } }