aubmindlab
/

aragpt2-mega

@@ -5,7 +5,7 @@
   ],
   "auto_map": {
     "AutoConfig": "configuration_aragpt2.AraGPT2Config",
-    "AutoForCausalLM": "modeling_aragpt2.AraGPT2ForCausalLM",
     "AutoModel": "modeling_aragpt2.AraGPT2Model"
   },
   "attention_probs_dropout_prob": 0.1,

   ],
   "auto_map": {
     "AutoConfig": "configuration_aragpt2.AraGPT2Config",
+    "AutoModelForCausalLM": "modeling_aragpt2.AraGPT2LMHeadModel",
     "AutoModel": "modeling_aragpt2.AraGPT2Model"
   },
   "attention_probs_dropout_prob": 0.1,

configuration_aragpt2.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # coding=utf-8
-""" AraAraGPT2 configuration"""
 from collections import OrderedDict
 from typing import Any, List, Mapping, Optional
@@ -18,7 +18,7 @@ AraGPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 class AraGPT2Config(PretrainedConfig):
     """
-    This is the configuration class to store the configuration of a [`AraAraGPT2Model`] or a [`TFAraAraGPT2Model`]. It is used to
     instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
     configuration with the defaults will yield a similar configuration to that of the AraGPT2
     [aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
         n_layer=12,
         n_head=12,
         n_inner=None,
-        activation_function="gelu_new",
         resid_pdrop=0.1,
         embd_pdrop=0.1,
         attn_pdrop=0.1,
@@ -144,8 +144,8 @@ class AraGPT2Config(PretrainedConfig):
         summary_first_dropout=0.1,
         scale_attn_weights=True,
         use_cache=True,
-        bos_token_id=50256,
-        eos_token_id=50256,
         scale_attn_by_inverse_layer_idx=False,
         reorder_and_upcast_attn=False,
         **kwargs,

 # coding=utf-8
+""" AraGPT2 configuration"""
 from collections import OrderedDict
 from typing import Any, List, Mapping, Optional
 class AraGPT2Config(PretrainedConfig):
     """
+    This is the configuration class to store the configuration of a [`AraGPT2Model`] or a [`TFAraGPT2Model`]. It is used to
     instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
     configuration with the defaults will yield a similar configuration to that of the AraGPT2
     [aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
         n_layer=12,
         n_head=12,
         n_inner=None,
+        activation_function="gelu",
         resid_pdrop=0.1,
         embd_pdrop=0.1,
         attn_pdrop=0.1,
         summary_first_dropout=0.1,
         scale_attn_weights=True,
         use_cache=True,
+        bos_token_id=0,
+        eos_token_id=0,
         scale_attn_by_inverse_layer_idx=False,
         reorder_and_upcast_attn=False,
         **kwargs,

modeling_aragpt2.py CHANGED Viewed

@@ -1,19 +1,5 @@
 # coding=utf-8
-# Copyright 2018 The OpenAI Team Authors and HuggingFace Inc. team.
-# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""PyTorch OpenAI GPT-2 model."""
 import math
 import os
@@ -59,16 +45,12 @@ _CHECKPOINT_FOR_DOC = "aubmindlab/aragpt2-mega"
 _CONFIG_FOR_DOC = "AraGPT2Config"
 _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
-GPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
     "aubmindlab/aragpt2-mega",
-    "gpt2-medium",
-    "aubmindlab/aragpt2-mega",
-    "aubmindlab/aragpt2-mega",
-    "distilgpt2",
-    # See all GPT-2 models at https://huggingface.co/models?filter=gpt2
 ]
-_GPT2_ML_TF_TO_TORCH = {
     "LayerNorm_embed_norm": "emb_norm",
     "pos_embed": "wpe.weight",
     "word_embed": "wte.weight",
@@ -89,19 +71,22 @@ _GPT2_ML_TF_TO_TORCH = {
     "bias": "bias",
 }
 def convert_gpt2_checkpoint_to_pytorch(
-    gpt2_checkpoint_path, gpt2_config_file, pytorch_dump_folder_path
 ):
     # Construct model
-    if gpt2_config_file == "":
         config = AraGPT2Config()
     else:
-        config = AraGPT2Config.from_json_file(gpt2_config_file)
     model = AraGPT2Model(config)
     # Load weights from numpy
-    load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path)
     # Save pytorch-model
     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
@@ -115,7 +100,7 @@ def convert_gpt2_checkpoint_to_pytorch(
 # XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
 #      https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
-def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
     """Load tf checkpoints in a pytorch model"""
     try:
         import re
@@ -126,7 +111,7 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
             "https://www.tensorflow.org/install/ for installation instructions."
         )
         raise
-    tf_path = os.path.abspath(gpt2_checkpoint_path)
     logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
     # Load weights from TF model
     init_vars = tf.train.list_variables(tf_path)
@@ -157,13 +142,13 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
             if sname == "" or sname == "embeddings":
                 continue
-            elif sname not in _GPT2_ML_TF_TO_TORCH:
                 print("=========================================================")
                 logger.info("Skip var name {}".format(scope_names))
                 pointer = None
                 break
             else:
-                tname = _GPT2_ML_TF_TO_TORCH[sname]
                 if "." in tname:
                     parent, child = tname.split(".")
                     pointer = getattr(pointer, parent)
@@ -602,7 +587,7 @@ class AraGPT2PreTrainedModel(PreTrainedModel):
     """
     config_class = AraGPT2Config
-    load_tf_weights = load_tf_weights_in_gpt2
     base_model_prefix = "transformer"
     is_parallelizable = True
     supports_gradient_checkpointing = True
@@ -828,7 +813,7 @@ class AraGPT2Model(AraGPT2PreTrainedModel):
     _keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
     _keys_to_ignore_on_load_missing = ["attn.masked_bias"]
-    def __init__(self, config):
         super().__init__(config)
         self.embed_dim = config.hidden_size
@@ -1177,7 +1162,7 @@ class AraGPT2LMHeadModel(AraGPT2PreTrainedModel):
     ]
     _tied_weights_keys = ["lm_head.weight"]
-    def __init__(self, config):
         super().__init__(config)
         self.transformer = AraGPT2Model(config)
         self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
@@ -1399,7 +1384,7 @@ class AraGPT2DoubleHeadsModel(AraGPT2PreTrainedModel):
     ]
     _tied_weights_keys = ["lm_head.weight"]
-    def __init__(self, config):
         super().__init__(config)
         config.num_labels = 1
         self.transformer = AraGPT2Model(config)
@@ -1653,7 +1638,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
     ]
     _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
-    def __init__(self, config):
         super().__init__(config)
         self.num_labels = config.num_labels
         self.transformer = AraGPT2Model(config)
@@ -1789,7 +1774,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
     AraGPT2_START_DOCSTRING,
 )
 class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
-    def __init__(self, config):
         super().__init__(config)
         self.num_labels = config.num_labels
@@ -1890,7 +1875,7 @@ class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
     AraGPT2_START_DOCSTRING,
 )
 class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
-    def __init__(self, config):
         super().__init__(config)
         self.num_labels = config.num_labels
         self.transformer = AraGPT2Model(config)

 # coding=utf-8
+"""PyTorch AraGPT2 model."""
 import math
 import os
 _CONFIG_FOR_DOC = "AraGPT2Config"
 _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
+ARAGPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
     "aubmindlab/aragpt2-mega",
+    # See all AraGPT2 models at https://huggingface.co/models?filter=aragpt2
 ]
+_ARAGPT2_ML_TF_TO_TORCH = {
     "LayerNorm_embed_norm": "emb_norm",
     "pos_embed": "wpe.weight",
     "word_embed": "wte.weight",
     "bias": "bias",
 }
+WEIGHTS_NAME = "pytorch_model.bin"
+CONFIG_NAME = "config.json"
 def convert_gpt2_checkpoint_to_pytorch(
+    aragpt2_checkpoint_path, aragpt2_config_file, pytorch_dump_folder_path
 ):
     # Construct model
+    if aragpt2_config_file == "":
         config = AraGPT2Config()
     else:
+        config = AraGPT2Config.from_json_file(aragpt2_config_file)
     model = AraGPT2Model(config)
     # Load weights from numpy
+    load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path)
     # Save pytorch-model
     pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
 # XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
 #      https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
+def load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path):
     """Load tf checkpoints in a pytorch model"""
     try:
         import re
             "https://www.tensorflow.org/install/ for installation instructions."
         )
         raise
+    tf_path = os.path.abspath(aragpt2_checkpoint_path)
     logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
     # Load weights from TF model
     init_vars = tf.train.list_variables(tf_path)
             if sname == "" or sname == "embeddings":
                 continue
+            elif sname not in _ARAGPT2_ML_TF_TO_TORCH:
                 print("=========================================================")
                 logger.info("Skip var name {}".format(scope_names))
                 pointer = None
                 break
             else:
+                tname = _ARAGPT2_ML_TF_TO_TORCH[sname]
                 if "." in tname:
                     parent, child = tname.split(".")
                     pointer = getattr(pointer, parent)
     """
     config_class = AraGPT2Config
+    load_tf_weights = load_tf_weights_in_aragpt2
     base_model_prefix = "transformer"
     is_parallelizable = True
     supports_gradient_checkpointing = True
     _keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
     _keys_to_ignore_on_load_missing = ["attn.masked_bias"]
+    def __init__(self, config: AraGPT2Config):
         super().__init__(config)
         self.embed_dim = config.hidden_size
     ]
     _tied_weights_keys = ["lm_head.weight"]
+    def __init__(self, config: AraGPT2Config):
         super().__init__(config)
         self.transformer = AraGPT2Model(config)
         self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
     ]
     _tied_weights_keys = ["lm_head.weight"]
+    def __init__(self, config: AraGPT2Config):
         super().__init__(config)
         config.num_labels = 1
         self.transformer = AraGPT2Model(config)
     ]
     _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
+    def __init__(self, config: AraGPT2Config):
         super().__init__(config)
         self.num_labels = config.num_labels
         self.transformer = AraGPT2Model(config)
     AraGPT2_START_DOCSTRING,
 )
 class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
+    def __init__(self, config: AraGPT2Config):
         super().__init__(config)
         self.num_labels = config.num_labels
     AraGPT2_START_DOCSTRING,
 )
 class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
+    def __init__(self, config: AraGPT2Config):
         super().__init__(config)
         self.num_labels = config.num_labels
         self.transformer = AraGPT2Model(config)