BART-Lagrangian / tokenizer.json
JoseEliel's picture
Upload tokenizer
351d7bd verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[SOS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[EOS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"[SOS]": 0,
"[EOS]": 1,
"[PAD]": 2,
"[UNK]": 3,
"/": 4,
"1": 5,
"2": 6,
"-": 7,
"+": 8,
"0": 9,
"3": 10,
"4": 11,
"5": 12,
"6": 13,
"7": 14,
"8": 15,
"9": 16,
"COMMUTATOR_A": 17,
"COMMUTATOR_B": 18,
"CONTRACTIONS": 19,
"DAGGER": 20,
"DERIVATIVE": 21,
"FIELD": 22,
"HEL": 23,
"ID0": 24,
"ID1": 25,
"ID2": 26,
"ID3": 27,
"ID4": 28,
"ID5": 29,
"ID6": 30,
"ID7": 31,
"ID8": 32,
"ID9": 33,
"LORENTZ": 34,
"SIGMA_BAR": 35,
"SPIN": 36,
"SU2": 37,
"SU3": 38,
"U1": 39,
"i": 40
},
"unk_token": "[UNK]"
}
}