from __future__ import annotationsimport unittestfrom transformers import is_tf_availablefrom transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slowfrom ...test_configuration_common import ConfigTesterfrom ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_maskfrom ...test_pipeline_mixin import PipelineTesterMixinif is_tf_available():    import numpy as np    import tensorflow as tf    from transformers import (        TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST,        FlaubertConfig,        TFFlaubertForMultipleChoice,        TFFlaubertForQuestionAnsweringSimple,        TFFlaubertForSequenceClassification,        TFFlaubertForTokenClassification,        TFFlaubertModel,        TFFlaubertWithLMHeadModel,    )class TFFlaubertModelTester:    def __init__(        self,        parent,    ):        self.parent = parent        self.batch_size = 13        self.seq_length = 7        self.is_training = True        self.use_input_lengths = True        self.use_token_type_ids = True        self.use_labels = True        self.gelu_activation = True        self.sinusoidal_embeddings = False        self.causal = False        self.asm = False        self.n_langs = 2        self.vocab_size = 99        self.n_special = 0        self.hidden_size = 32        self.num_hidden_layers = 2        self.num_attention_heads = 4        self.hidden_dropout_prob = 0.1        self.attention_probs_dropout_prob = 0.1        self.max_position_embeddings = 512        self.type_vocab_size = 16        self.type_sequence_label_size = 2        self.initializer_range = 0.02        self.num_labels = 3        self.num_choices = 4        self.summary_type = "last"        self.use_proj = True        self.scope = None        self.bos_token_id = 0    def prepare_config_and_inputs(self):        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)        input_mask = random_attention_mask([self.batch_size, self.seq_length], dtype=tf.float32)        input_lengths = None        if self.use_input_lengths:            input_lengths = (                ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2            )  # small variation of seq_length        token_type_ids = None        if self.use_token_type_ids:            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.n_langs)        sequence_labels = None        token_labels = None        is_impossible_labels = None        if self.use_labels:            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)            is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)            choice_labels = ids_tensor([self.batch_size], self.num_choices)        config = FlaubertConfig(            vocab_size=self.vocab_size,            n_special=self.n_special,            emb_dim=self.hidden_size,            n_layers=self.num_hidden_layers,            n_heads=self.num_attention_heads,            dropout=self.hidden_dropout_prob,            attention_dropout=self.attention_probs_dropout_prob,            gelu_activation=self.gelu_activation,            sinusoidal_embeddings=self.sinusoidal_embeddings,            asm=self.asm,            causal=self.causal,            n_langs=self.n_langs,            max_position_embeddings=self.max_position_embeddings,            initializer_range=self.initializer_range,            summary_type=self.summary_type,            use_proj=self.use_proj,            bos_token_id=self.bos_token_id,        )        return (            config,            input_ids,            token_type_ids,            input_lengths,            sequence_labels,            token_labels,            is_impossible_labels,            choice_labels,            input_mask,        )    def create_and_check_flaubert_model(        self,        config,        input_ids,        token_type_ids,        input_lengths,        sequence_labels,        token_labels,        is_impossible_labels,        choice_labels,        input_mask,    ):        model = TFFlaubertModel(config=config)        inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}        result = model(inputs)        inputs = [input_ids, input_mask]        result = model(inputs)        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))    def create_and_check_flaubert_lm_head(        self,        config,        input_ids,        token_type_ids,        input_lengths,        sequence_labels,        token_labels,        is_impossible_labels,        choice_labels,        input_mask,    ):        model = TFFlaubertWithLMHeadModel(config)        inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}        result = model(inputs)        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))    def create_and_check_flaubert_qa(        self,        config,        input_ids,        token_type_ids,        input_lengths,        sequence_labels,        token_labels,        is_impossible_labels,        choice_labels,        input_mask,    ):        model = TFFlaubertForQuestionAnsweringSimple(config)        inputs = {"input_ids": input_ids, "lengths": input_lengths}        result = model(inputs)        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))    def create_and_check_flaubert_sequence_classif(        self,        config,        input_ids,        token_type_ids,        input_lengths,        sequence_labels,        token_labels,        is_impossible_labels,        choice_labels,        input_mask,    ):        model = TFFlaubertForSequenceClassification(config)        inputs = {"input_ids": input_ids, "lengths": input_lengths}        result = model(inputs)        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))    def create_and_check_flaubert_for_token_classification(        self,        config,        input_ids,        token_type_ids,        input_lengths,        sequence_labels,        token_labels,        is_impossible_labels,        choice_labels,        input_mask,    ):        config.num_labels = self.num_labels        model = TFFlaubertForTokenClassification(config=config)        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}        result = model(inputs)        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))    def create_and_check_flaubert_for_multiple_choice(        self,        config,        input_ids,        token_type_ids,        input_lengths,        sequence_labels,        token_labels,        is_impossible_labels,        choice_labels,        input_mask,    ):        config.num_choices = self.num_choices        model = TFFlaubertForMultipleChoice(config=config)        multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))        multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))        multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))        inputs = {            "input_ids": multiple_choice_inputs_ids,            "attention_mask": multiple_choice_input_mask,            "token_type_ids": multiple_choice_token_type_ids,        }        result = model(inputs)        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))    def prepare_config_and_inputs_for_common(self):        config_and_inputs = self.prepare_config_and_inputs()        (            config,            input_ids,            token_type_ids,            input_lengths,            sequence_labels,            token_labels,            is_impossible_labels,            choice_labels,            input_mask,        ) = config_and_inputs        inputs_dict = {            "input_ids": input_ids,            "token_type_ids": token_type_ids,            "langs": token_type_ids,            "lengths": input_lengths,        }        return config, inputs_dict@require_tfclass TFFlaubertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):    all_model_classes = (        (            TFFlaubertModel,            TFFlaubertWithLMHeadModel,            TFFlaubertForSequenceClassification,            TFFlaubertForQuestionAnsweringSimple,            TFFlaubertForTokenClassification,            TFFlaubertForMultipleChoice,        )        if is_tf_available()        else ()    )    all_generative_model_classes = (        (TFFlaubertWithLMHeadModel,) if is_tf_available() else ()    )  # TODO (PVP): Check other models whether language generation is also applicable    pipeline_model_mapping = (        {            "feature-extraction": TFFlaubertModel,            "fill-mask": TFFlaubertWithLMHeadModel,            "question-answering": TFFlaubertForQuestionAnsweringSimple,            "text-classification": TFFlaubertForSequenceClassification,            "token-classification": TFFlaubertForTokenClassification,            "zero-shot": TFFlaubertForSequenceClassification,        }        if is_tf_available()        else {}    )    test_head_masking = False    test_onnx = False    # TODO: Fix the failed tests    def is_pipeline_test_to_skip(        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name    ):        if (            pipeline_test_casse_name == "QAPipelineTests"            and tokenizer_name is not None            and not tokenizer_name.endswith("Fast")        ):            # `QAPipelineTests` fails for a few models when the slower tokenizer are used.            # (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)            # TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer            return True        return False    def setUp(self):        self.model_tester = TFFlaubertModelTester(self)        self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37)    def test_config(self):        self.config_tester.run_common_tests()    def test_flaubert_model(self):        config_and_inputs = self.model_tester.prepare_config_and_inputs()        self.model_tester.create_and_check_flaubert_model(*config_and_inputs)    def test_flaubert_lm_head(self):        config_and_inputs = self.model_tester.prepare_config_and_inputs()        self.model_tester.create_and_check_flaubert_lm_head(*config_and_inputs)    def test_flaubert_qa(self):        config_and_inputs = self.model_tester.prepare_config_and_inputs()        self.model_tester.create_and_check_flaubert_qa(*config_and_inputs)    def test_flaubert_sequence_classif(self):        config_and_inputs = self.model_tester.prepare_config_and_inputs()        self.model_tester.create_and_check_flaubert_sequence_classif(*config_and_inputs)    def test_for_token_classification(self):        config_and_inputs = self.model_tester.prepare_config_and_inputs()        self.model_tester.create_and_check_flaubert_for_token_classification(*config_and_inputs)    def test_for_multiple_choice(self):        config_and_inputs = self.model_tester.prepare_config_and_inputs()        self.model_tester.create_and_check_flaubert_for_multiple_choice(*config_and_inputs)    @slow    def test_model_from_pretrained(self):        for model_name in TF_FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:            model = TFFlaubertModel.from_pretrained(model_name)            self.assertIsNotNone(model)@require_tf@require_sentencepiece@require_tokenizersclass TFFlaubertModelIntegrationTest(unittest.TestCase):    @slow    def test_output_embeds_base_model(self):        model = TFFlaubertModel.from_pretrained("jplu/tf-flaubert-small-cased")        input_ids = tf.convert_to_tensor(            [[0, 158, 735, 2592, 1424, 6727, 82, 1]],            dtype=tf.int32,        )  # "J'aime flaubert !"        output = model(input_ids)[0]        expected_shape = tf.TensorShape((1, 8, 512))        self.assertEqual(output.shape, expected_shape)        # compare the actual values for a slice.        expected_slice = tf.convert_to_tensor(            [                [                    [-1.8768773, -1.566555, 0.27072418],                    [-1.6920038, -0.5873505, 1.9329599],                    [-2.9563985, -1.6993835, 1.7972052],                ]            ],            dtype=tf.float32,        )        self.assertTrue(np.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))