""" ViT MSN model configuration"""from ...configuration_utils import PretrainedConfigfrom ...utils import logginglogger = logging.get_logger(__name__)VIT_MSN_PRETRAINED_CONFIG_ARCHIVE_MAP = {    "sayakpaul/vit-msn-base": "https://huggingface.co/sayakpaul/vit-msn-base/resolve/main/config.json",    # See all ViT MSN models at https://huggingface.co/models?filter=vit_msn}class ViTMSNConfig(PretrainedConfig):    r"""    This is the configuration class to store the configuration of a [`ViTMSNModel`]. It is used to instantiate an ViT    MSN model according to the specified arguments, defining the model architecture. Instantiating a configuration with    the defaults will yield a similar configuration to that of the ViT    [facebook/vit_msn_base](https://huggingface.co/facebook/vit_msn_base) architecture.    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the    documentation from [`PretrainedConfig`] for more information.    Args:        hidden_size (`int`, *optional*, defaults to 768):            Dimensionality of the encoder layers and the pooler layer.        num_hidden_layers (`int`, *optional*, defaults to 12):            Number of hidden layers in the Transformer encoder.        num_attention_heads (`int`, *optional*, defaults to 12):            Number of attention heads for each attention layer in the Transformer encoder.        intermediate_size (`int`, *optional*, defaults to 3072):            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,            `"relu"`, `"selu"` and `"gelu_new"` are supported.        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):            The dropout ratio for the attention probabilities.        initializer_range (`float`, *optional*, defaults to 0.02):            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.        layer_norm_eps (`float`, *optional*, defaults to 1e-06):            The epsilon used by the layer normalization layers.        image_size (`int`, *optional*, defaults to 224):            The size (resolution) of each image.        patch_size (`int`, *optional*, defaults to 16):            The size (resolution) of each patch.        num_channels (`int`, *optional*, defaults to 3):            The number of input channels.        qkv_bias (`bool`, *optional*, defaults to `True`):            Whether to add a bias to the queries, keys and values.    Example:    ```python    >>> from transformers import ViTMSNModel, ViTMSNConfig    >>> # Initializing a ViT MSN vit-msn-base style configuration    >>> configuration = ViTConfig()    >>> # Initializing a model from the vit-msn-base style configuration    >>> model = ViTMSNModel(configuration)    >>> # Accessing the model configuration    >>> configuration = model.config    ```"""    model_type = "vit_msn"    def __init__(        self,        hidden_size=768,        num_hidden_layers=12,        num_attention_heads=12,        intermediate_size=3072,        hidden_act="gelu",        hidden_dropout_prob=0.0,        attention_probs_dropout_prob=0.0,        initializer_range=0.02,        layer_norm_eps=1e-06,        image_size=224,        patch_size=16,        num_channels=3,        qkv_bias=True,        **kwargs,    ):        super().__init__(**kwargs)        self.hidden_size = hidden_size        self.num_hidden_layers = num_hidden_layers        self.num_attention_heads = num_attention_heads        self.intermediate_size = intermediate_size        self.hidden_act = hidden_act        self.hidden_dropout_prob = hidden_dropout_prob        self.attention_probs_dropout_prob = attention_probs_dropout_prob        self.initializer_range = initializer_range        self.layer_norm_eps = layer_norm_eps        self.image_size = image_size        self.patch_size = patch_size        self.num_channels = num_channels        self.qkv_bias = qkv_bias