{ | |
"version": "1.0", | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "[MASK]", | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "[UNK]", | |
"special": true | |
} | |
], | |
"pre_tokenizer": { | |
"type": "KmerSplitter", | |
"k": 4, | |
"stride": 4, | |
"max_length": 660 | |
}, | |
"model": { | |
"type": "KmerTokenizer", | |
"unk_token": "[UNK]", | |
"vocab": { | |
"[MASK]": 0, | |
"[UNK]": 1, | |
"AAAA": 2, | |
"AAAC": 3, | |
"AAAG": 4, | |
"AAAT": 5, | |
"AACA": 6, | |
"AACC": 7, | |
"AACG": 8, | |
"AACT": 9, | |
"AAGA": 10, | |
"AAGC": 11, | |
"AAGG": 12, | |
"AAGT": 13, | |
"AATA": 14, | |
"AATC": 15, | |
"AATG": 16, | |
"AATT": 17, | |
"ACAA": 18, | |
"ACAC": 19, | |
"ACAG": 20, | |
"ACAT": 21, | |
"ACCA": 22, | |
"ACCC": 23, | |
"ACCG": 24, | |
"ACCT": 25, | |
"ACGA": 26, | |
"ACGC": 27, | |
"ACGG": 28, | |
"ACGT": 29, | |
"ACTA": 30, | |
"ACTC": 31, | |
"ACTG": 32, | |
"ACTT": 33, | |
"AGAA": 34, | |
"AGAC": 35, | |
"AGAG": 36, | |
"AGAT": 37, | |
"AGCA": 38, | |
"AGCC": 39, | |
"AGCG": 40, | |
"AGCT": 41, | |
"AGGA": 42, | |
"AGGC": 43, | |
"AGGG": 44, | |
"AGGT": 45, | |
"AGTA": 46, | |
"AGTC": 47, | |
"AGTG": 48, | |
"AGTT": 49, | |
"ATAA": 50, | |
"ATAC": 51, | |
"ATAG": 52, | |
"ATAT": 53, | |
"ATCA": 54, | |
"ATCC": 55, | |
"ATCG": 56, | |
"ATCT": 57, | |
"ATGA": 58, | |
"ATGC": 59, | |
"ATGG": 60, | |
"ATGT": 61, | |
"ATTA": 62, | |
"ATTC": 63, | |
"ATTG": 64, | |
"ATTT": 65, | |
"CAAA": 66, | |
"CAAC": 67, | |
"CAAG": 68, | |
"CAAT": 69, | |
"CACA": 70, | |
"CACC": 71, | |
"CACG": 72, | |
"CACT": 73, | |
"CAGA": 74, | |
"CAGC": 75, | |
"CAGG": 76, | |
"CAGT": 77, | |
"CATA": 78, | |
"CATC": 79, | |
"CATG": 80, | |
"CATT": 81, | |
"CCAA": 82, | |
"CCAC": 83, | |
"CCAG": 84, | |
"CCAT": 85, | |
"CCCA": 86, | |
"CCCC": 87, | |
"CCCG": 88, | |
"CCCT": 89, | |
"CCGA": 90, | |
"CCGC": 91, | |
"CCGG": 92, | |
"CCGT": 93, | |
"CCTA": 94, | |
"CCTC": 95, | |
"CCTG": 96, | |
"CCTT": 97, | |
"CGAA": 98, | |
"CGAC": 99, | |
"CGAG": 100, | |
"CGAT": 101, | |
"CGCA": 102, | |
"CGCC": 103, | |
"CGCG": 104, | |
"CGCT": 105, | |
"CGGA": 106, | |
"CGGC": 107, | |
"CGGG": 108, | |
"CGGT": 109, | |
"CGTA": 110, | |
"CGTC": 111, | |
"CGTG": 112, | |
"CGTT": 113, | |
"CTAA": 114, | |
"CTAC": 115, | |
"CTAG": 116, | |
"CTAT": 117, | |
"CTCA": 118, | |
"CTCC": 119, | |
"CTCG": 120, | |
"CTCT": 121, | |
"CTGA": 122, | |
"CTGC": 123, | |
"CTGG": 124, | |
"CTGT": 125, | |
"CTTA": 126, | |
"CTTC": 127, | |
"CTTG": 128, | |
"CTTT": 129, | |
"GAAA": 130, | |
"GAAC": 131, | |
"GAAG": 132, | |
"GAAT": 133, | |
"GACA": 134, | |
"GACC": 135, | |
"GACG": 136, | |
"GACT": 137, | |
"GAGA": 138, | |
"GAGC": 139, | |
"GAGG": 140, | |
"GAGT": 141, | |
"GATA": 142, | |
"GATC": 143, | |
"GATG": 144, | |
"GATT": 145, | |
"GCAA": 146, | |
"GCAC": 147, | |
"GCAG": 148, | |
"GCAT": 149, | |
"GCCA": 150, | |
"GCCC": 151, | |
"GCCG": 152, | |
"GCCT": 153, | |
"GCGA": 154, | |
"GCGC": 155, | |
"GCGG": 156, | |
"GCGT": 157, | |
"GCTA": 158, | |
"GCTC": 159, | |
"GCTG": 160, | |
"GCTT": 161, | |
"GGAA": 162, | |
"GGAC": 163, | |
"GGAG": 164, | |
"GGAT": 165, | |
"GGCA": 166, | |
"GGCC": 167, | |
"GGCG": 168, | |
"GGCT": 169, | |
"GGGA": 170, | |
"GGGC": 171, | |
"GGGG": 172, | |
"GGGT": 173, | |
"GGTA": 174, | |
"GGTC": 175, | |
"GGTG": 176, | |
"GGTT": 177, | |
"GTAA": 178, | |
"GTAC": 179, | |
"GTAG": 180, | |
"GTAT": 181, | |
"GTCA": 182, | |
"GTCC": 183, | |
"GTCG": 184, | |
"GTCT": 185, | |
"GTGA": 186, | |
"GTGC": 187, | |
"GTGG": 188, | |
"GTGT": 189, | |
"GTTA": 190, | |
"GTTC": 191, | |
"GTTG": 192, | |
"GTTT": 193, | |
"TAAA": 194, | |
"TAAC": 195, | |
"TAAG": 196, | |
"TAAT": 197, | |
"TACA": 198, | |
"TACC": 199, | |
"TACG": 200, | |
"TACT": 201, | |
"TAGA": 202, | |
"TAGC": 203, | |
"TAGG": 204, | |
"TAGT": 205, | |
"TATA": 206, | |
"TATC": 207, | |
"TATG": 208, | |
"TATT": 209, | |
"TCAA": 210, | |
"TCAC": 211, | |
"TCAG": 212, | |
"TCAT": 213, | |
"TCCA": 214, | |
"TCCC": 215, | |
"TCCG": 216, | |
"TCCT": 217, | |
"TCGA": 218, | |
"TCGC": 219, | |
"TCGG": 220, | |
"TCGT": 221, | |
"TCTA": 222, | |
"TCTC": 223, | |
"TCTG": 224, | |
"TCTT": 225, | |
"TGAA": 226, | |
"TGAC": 227, | |
"TGAG": 228, | |
"TGAT": 229, | |
"TGCA": 230, | |
"TGCC": 231, | |
"TGCG": 232, | |
"TGCT": 233, | |
"TGGA": 234, | |
"TGGC": 235, | |
"TGGG": 236, | |
"TGGT": 237, | |
"TGTA": 238, | |
"TGTC": 239, | |
"TGTG": 240, | |
"TGTT": 241, | |
"TTAA": 242, | |
"TTAC": 243, | |
"TTAG": 244, | |
"TTAT": 245, | |
"TTCA": 246, | |
"TTCC": 247, | |
"TTCG": 248, | |
"TTCT": 249, | |
"TTGA": 250, | |
"TTGC": 251, | |
"TTGG": 252, | |
"TTGT": 253, | |
"TTTA": 254, | |
"TTTC": 255, | |
"TTTG": 256, | |
"TTTT": 257 | |
} | |
} | |
} |