from typing import Dict from transformers import PreTrainedTokenizer, AddedToken class CustomTokenizer(PreTrainedTokenizer): def __init__(self, **kwargs): super().__init__(**kwargs) print("Initializing CustomTokenizer") def tokenize(self, text): print("Tokenizing text", text) return text.split() def get_vocab(self) -> Dict[str, int]: return {}