can.turan commited on
Commit
7833aec
·
1 Parent(s): 04250db

Tokenizer fix

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +8 -4
tokenizer_config.json CHANGED
@@ -47,9 +47,9 @@
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
- "max_len": 8096,
51
- "max_length": 8096,
52
- "model_max_length": 8096,
53
  "never_split": null,
54
  "pad_to_multiple_of": null,
55
  "pad_token": "[PAD]",
@@ -59,8 +59,12 @@
59
  "stride": 0,
60
  "strip_accents": false,
61
  "tokenize_chinese_chars": true,
62
- "tokenizer_class": "ElectraTokenizer",
63
  "truncation_side": "right",
64
  "truncation_strategy": "longest_first",
 
 
 
 
65
  "unk_token": "[UNK]"
66
  }
 
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
+ "max_len": 8192,
51
+ "max_length": 8192,
52
+ "model_max_length": 8192,
53
  "never_split": null,
54
  "pad_to_multiple_of": null,
55
  "pad_token": "[PAD]",
 
59
  "stride": 0,
60
  "strip_accents": false,
61
  "tokenize_chinese_chars": true,
62
+ "tokenizer_class": "PreTrainedTokenizerFast",
63
  "truncation_side": "right",
64
  "truncation_strategy": "longest_first",
65
+ "model_input_names": [
66
+ "input_ids",
67
+ "attention_mask"
68
+ ],
69
  "unk_token": "[UNK]"
70
  }