connor-henderson
commited on
Commit
·
efef055
1
Parent(s):
b0dd235
Upload tokenizer
Browse files- special_tokens_map.json +6 -0
- tokenizer_config.json +9 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<sos/eos>",
|
3 |
+
"eos_token": "<sos/eos>",
|
4 |
+
"pad_token": "<blank>",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<sos/eos>",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"eos_token": "<sos/eos>",
|
5 |
+
"model_max_length": 1000000000000000019884624838656,
|
6 |
+
"pad_token": "<blank>",
|
7 |
+
"tokenizer_class": "FastSpeech2ConformerTokenizer",
|
8 |
+
"unk_token": "<unk>"
|
9 |
+
}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<blank>": 0, "<unk>": 1, "AH0": 2, "N": 3, "T": 4, "D": 5, "S": 6, "R": 7, "L": 8, "DH": 9, "K": 10, "Z": 11, "IH1": 12, "IH0": 13, "M": 14, "EH1": 15, "W": 16, "P": 17, "AE1": 18, "AH1": 19, "V": 20, "ER0": 21, "F": 22, ",": 23, "AA1": 24, "B": 25, "HH": 26, "IY1": 27, "UW1": 28, "IY0": 29, "AO1": 30, "EY1": 31, "AY1": 32, ".": 33, "OW1": 34, "SH": 35, "NG": 36, "G": 37, "ER1": 38, "CH": 39, "JH": 40, "Y": 41, "AW1": 42, "TH": 43, "UH1": 44, "EH2": 45, "OW0": 46, "EY2": 47, "AO0": 48, "IH2": 49, "AE2": 50, "AY2": 51, "AA2": 52, "UW0": 53, "EH0": 54, "OY1": 55, "EY0": 56, "AO2": 57, "ZH": 58, "OW2": 59, "AE0": 60, "UW2": 61, "AH2": 62, "AY0": 63, "IY2": 64, "AW2": 65, "AA0": 66, "'": 67, "ER2": 68, "UH2": 69, "?": 70, "OY2": 71, "!": 72, "AW0": 73, "UH0": 74, "OY0": 75, "..": 76, "<sos/eos>": 77}
|