""" Tokenization class for model Reformer."""import osfrom shutil import copyfilefrom typing import Optional, Tuplefrom ...tokenization_utils_fast import PreTrainedTokenizerFastfrom ...utils import is_sentencepiece_available, loggingif is_sentencepiece_available():    from .tokenization_reformer import ReformerTokenizerelse:    ReformerTokenizer = Nonelogger = logging.get_logger(__name__)SPIECE_UNDERLINE = "▁"VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.json"}PRETRAINED_VOCAB_FILES_MAP = {    "vocab_file": {        "google/reformer-crime-and-punishment": (            "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/spiece.model"        )    },    "tokenizer_file": {        "google/reformer-crime-and-punishment": (            "https://huggingface.co/google/reformer-crime-and-punishment/resolve/main/tokenizer.json"        )    },}PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {    "google/reformer-crime-and-punishment": 524288,}class ReformerTokenizerFast(PreTrainedTokenizerFast):    """    Construct a "fast" Reformer tokenizer (backed by HuggingFace's *tokenizers* library). Based on    [Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models).    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should    refer to this superclass for more information regarding those methods.    Args:        vocab_file (`str`):            [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that            contains the vocabulary necessary to instantiate a tokenizer.        eos_token (`str`, *optional*, defaults to `"</s>"`):            The end of sequence token.            <Tip>            When building a sequence using special tokens, this is not the token that is used for the end of sequence.            The token used is the `sep_token`.            </Tip>        unk_token (`str`, *optional*, defaults to `"<unk>"`):            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this            token instead.        pad_token (`str`, *optional*, defaults to `"<pad>"`):            The token used for padding, for example when batching sequences of different lengths.        additional_special_tokens (`List[str]`, *optional*):            Additional special tokens used by the tokenizer.    """    vocab_files_names = VOCAB_FILES_NAMES    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES    model_input_names = ["input_ids", "attention_mask"]    slow_tokenizer_class = ReformerTokenizer    def __init__(        self,        vocab_file=None,        tokenizer_file=None,        eos_token="</s>",        unk_token="<unk>",        additional_special_tokens=[],        **kwargs,    ):        super().__init__(            vocab_file,            tokenizer_file=tokenizer_file,            eos_token=eos_token,            unk_token=unk_token,            additional_special_tokens=additional_special_tokens,            **kwargs,        )        self.vocab_file = vocab_file    @property    def can_save_slow_tokenizer(self) -> bool:        return os.path.isfile(self.vocab_file) if self.vocab_file else False    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:        if not self.can_save_slow_tokenizer:            raise ValueError(                "Your fast tokenizer does not have the necessary information to save the vocabulary for a slow "                "tokenizer."            )        if not os.path.isdir(save_directory):            logger.error(f"Vocabulary path ({save_directory}) should be a directory")            return        out_vocab_file = os.path.join(            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]        )        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file):            copyfile(self.vocab_file, out_vocab_file)        return (out_vocab_file,)