Commit
·
c4b0d7f
1
Parent(s):
ea715ce
Update model configuration and mappings
Browse files- config.json +1 -1
- configuration_aragpt2.py +5 -5
- modeling_aragpt2.py +22 -37
config.json
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_aragpt2.AraGPT2Config",
|
8 |
-
"
|
9 |
"AutoModel": "modeling_aragpt2.AraGPT2Model"
|
10 |
},
|
11 |
"attention_probs_dropout_prob": 0.1,
|
|
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_aragpt2.AraGPT2Config",
|
8 |
+
"AutoModelForCausalLM": "modeling_aragpt2.AraGPT2LMHeadModel",
|
9 |
"AutoModel": "modeling_aragpt2.AraGPT2Model"
|
10 |
},
|
11 |
"attention_probs_dropout_prob": 0.1,
|
configuration_aragpt2.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# coding=utf-8
|
2 |
-
"""
|
3 |
from collections import OrderedDict
|
4 |
from typing import Any, List, Mapping, Optional
|
5 |
|
@@ -18,7 +18,7 @@ AraGPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
|
18 |
|
19 |
class AraGPT2Config(PretrainedConfig):
|
20 |
"""
|
21 |
-
This is the configuration class to store the configuration of a [`
|
22 |
instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
|
23 |
configuration with the defaults will yield a similar configuration to that of the AraGPT2
|
24 |
[aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
|
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
|
|
131 |
n_layer=12,
|
132 |
n_head=12,
|
133 |
n_inner=None,
|
134 |
-
activation_function="
|
135 |
resid_pdrop=0.1,
|
136 |
embd_pdrop=0.1,
|
137 |
attn_pdrop=0.1,
|
@@ -144,8 +144,8 @@ class AraGPT2Config(PretrainedConfig):
|
|
144 |
summary_first_dropout=0.1,
|
145 |
scale_attn_weights=True,
|
146 |
use_cache=True,
|
147 |
-
bos_token_id=
|
148 |
-
eos_token_id=
|
149 |
scale_attn_by_inverse_layer_idx=False,
|
150 |
reorder_and_upcast_attn=False,
|
151 |
**kwargs,
|
|
|
1 |
# coding=utf-8
|
2 |
+
""" AraGPT2 configuration"""
|
3 |
from collections import OrderedDict
|
4 |
from typing import Any, List, Mapping, Optional
|
5 |
|
|
|
18 |
|
19 |
class AraGPT2Config(PretrainedConfig):
|
20 |
"""
|
21 |
+
This is the configuration class to store the configuration of a [`AraGPT2Model`] or a [`TFAraGPT2Model`]. It is used to
|
22 |
instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
|
23 |
configuration with the defaults will yield a similar configuration to that of the AraGPT2
|
24 |
[aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
|
|
|
131 |
n_layer=12,
|
132 |
n_head=12,
|
133 |
n_inner=None,
|
134 |
+
activation_function="gelu",
|
135 |
resid_pdrop=0.1,
|
136 |
embd_pdrop=0.1,
|
137 |
attn_pdrop=0.1,
|
|
|
144 |
summary_first_dropout=0.1,
|
145 |
scale_attn_weights=True,
|
146 |
use_cache=True,
|
147 |
+
bos_token_id=0,
|
148 |
+
eos_token_id=0,
|
149 |
scale_attn_by_inverse_layer_idx=False,
|
150 |
reorder_and_upcast_attn=False,
|
151 |
**kwargs,
|
modeling_aragpt2.py
CHANGED
@@ -1,19 +1,5 @@
|
|
1 |
# coding=utf-8
|
2 |
-
|
3 |
-
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
4 |
-
#
|
5 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
-
# you may not use this file except in compliance with the License.
|
7 |
-
# You may obtain a copy of the License at
|
8 |
-
#
|
9 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
-
#
|
11 |
-
# Unless required by applicable law or agreed to in writing, software
|
12 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
-
# See the License for the specific language governing permissions and
|
15 |
-
# limitations under the License.
|
16 |
-
"""PyTorch OpenAI GPT-2 model."""
|
17 |
|
18 |
import math
|
19 |
import os
|
@@ -59,16 +45,12 @@ _CHECKPOINT_FOR_DOC = "aubmindlab/aragpt2-mega"
|
|
59 |
_CONFIG_FOR_DOC = "AraGPT2Config"
|
60 |
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
61 |
|
62 |
-
|
63 |
"aubmindlab/aragpt2-mega",
|
64 |
-
|
65 |
-
"aubmindlab/aragpt2-mega",
|
66 |
-
"aubmindlab/aragpt2-mega",
|
67 |
-
"distilgpt2",
|
68 |
-
# See all GPT-2 models at https://huggingface.co/models?filter=gpt2
|
69 |
]
|
70 |
|
71 |
-
|
72 |
"LayerNorm_embed_norm": "emb_norm",
|
73 |
"pos_embed": "wpe.weight",
|
74 |
"word_embed": "wte.weight",
|
@@ -89,19 +71,22 @@ _GPT2_ML_TF_TO_TORCH = {
|
|
89 |
"bias": "bias",
|
90 |
}
|
91 |
|
|
|
|
|
|
|
92 |
|
93 |
def convert_gpt2_checkpoint_to_pytorch(
|
94 |
-
|
95 |
):
|
96 |
# Construct model
|
97 |
-
if
|
98 |
config = AraGPT2Config()
|
99 |
else:
|
100 |
-
config = AraGPT2Config.from_json_file(
|
101 |
model = AraGPT2Model(config)
|
102 |
|
103 |
# Load weights from numpy
|
104 |
-
|
105 |
|
106 |
# Save pytorch-model
|
107 |
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
|
@@ -115,7 +100,7 @@ def convert_gpt2_checkpoint_to_pytorch(
|
|
115 |
|
116 |
# XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
|
117 |
# https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
|
118 |
-
def
|
119 |
"""Load tf checkpoints in a pytorch model"""
|
120 |
try:
|
121 |
import re
|
@@ -126,7 +111,7 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
|
|
126 |
"https://www.tensorflow.org/install/ for installation instructions."
|
127 |
)
|
128 |
raise
|
129 |
-
tf_path = os.path.abspath(
|
130 |
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
|
131 |
# Load weights from TF model
|
132 |
init_vars = tf.train.list_variables(tf_path)
|
@@ -157,13 +142,13 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
|
|
157 |
|
158 |
if sname == "" or sname == "embeddings":
|
159 |
continue
|
160 |
-
elif sname not in
|
161 |
print("=========================================================")
|
162 |
logger.info("Skip var name {}".format(scope_names))
|
163 |
pointer = None
|
164 |
break
|
165 |
else:
|
166 |
-
tname =
|
167 |
if "." in tname:
|
168 |
parent, child = tname.split(".")
|
169 |
pointer = getattr(pointer, parent)
|
@@ -602,7 +587,7 @@ class AraGPT2PreTrainedModel(PreTrainedModel):
|
|
602 |
"""
|
603 |
|
604 |
config_class = AraGPT2Config
|
605 |
-
load_tf_weights =
|
606 |
base_model_prefix = "transformer"
|
607 |
is_parallelizable = True
|
608 |
supports_gradient_checkpointing = True
|
@@ -828,7 +813,7 @@ class AraGPT2Model(AraGPT2PreTrainedModel):
|
|
828 |
_keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
|
829 |
_keys_to_ignore_on_load_missing = ["attn.masked_bias"]
|
830 |
|
831 |
-
def __init__(self, config):
|
832 |
super().__init__(config)
|
833 |
|
834 |
self.embed_dim = config.hidden_size
|
@@ -1177,7 +1162,7 @@ class AraGPT2LMHeadModel(AraGPT2PreTrainedModel):
|
|
1177 |
]
|
1178 |
_tied_weights_keys = ["lm_head.weight"]
|
1179 |
|
1180 |
-
def __init__(self, config):
|
1181 |
super().__init__(config)
|
1182 |
self.transformer = AraGPT2Model(config)
|
1183 |
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
|
@@ -1399,7 +1384,7 @@ class AraGPT2DoubleHeadsModel(AraGPT2PreTrainedModel):
|
|
1399 |
]
|
1400 |
_tied_weights_keys = ["lm_head.weight"]
|
1401 |
|
1402 |
-
def __init__(self, config):
|
1403 |
super().__init__(config)
|
1404 |
config.num_labels = 1
|
1405 |
self.transformer = AraGPT2Model(config)
|
@@ -1653,7 +1638,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
|
|
1653 |
]
|
1654 |
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
|
1655 |
|
1656 |
-
def __init__(self, config):
|
1657 |
super().__init__(config)
|
1658 |
self.num_labels = config.num_labels
|
1659 |
self.transformer = AraGPT2Model(config)
|
@@ -1789,7 +1774,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
|
|
1789 |
AraGPT2_START_DOCSTRING,
|
1790 |
)
|
1791 |
class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
|
1792 |
-
def __init__(self, config):
|
1793 |
super().__init__(config)
|
1794 |
self.num_labels = config.num_labels
|
1795 |
|
@@ -1890,7 +1875,7 @@ class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
|
|
1890 |
AraGPT2_START_DOCSTRING,
|
1891 |
)
|
1892 |
class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
|
1893 |
-
def __init__(self, config):
|
1894 |
super().__init__(config)
|
1895 |
self.num_labels = config.num_labels
|
1896 |
self.transformer = AraGPT2Model(config)
|
|
|
1 |
# coding=utf-8
|
2 |
+
"""PyTorch AraGPT2 model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
import math
|
5 |
import os
|
|
|
45 |
_CONFIG_FOR_DOC = "AraGPT2Config"
|
46 |
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
47 |
|
48 |
+
ARAGPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
49 |
"aubmindlab/aragpt2-mega",
|
50 |
+
# See all AraGPT2 models at https://huggingface.co/models?filter=aragpt2
|
|
|
|
|
|
|
|
|
51 |
]
|
52 |
|
53 |
+
_ARAGPT2_ML_TF_TO_TORCH = {
|
54 |
"LayerNorm_embed_norm": "emb_norm",
|
55 |
"pos_embed": "wpe.weight",
|
56 |
"word_embed": "wte.weight",
|
|
|
71 |
"bias": "bias",
|
72 |
}
|
73 |
|
74 |
+
WEIGHTS_NAME = "pytorch_model.bin"
|
75 |
+
CONFIG_NAME = "config.json"
|
76 |
+
|
77 |
|
78 |
def convert_gpt2_checkpoint_to_pytorch(
|
79 |
+
aragpt2_checkpoint_path, aragpt2_config_file, pytorch_dump_folder_path
|
80 |
):
|
81 |
# Construct model
|
82 |
+
if aragpt2_config_file == "":
|
83 |
config = AraGPT2Config()
|
84 |
else:
|
85 |
+
config = AraGPT2Config.from_json_file(aragpt2_config_file)
|
86 |
model = AraGPT2Model(config)
|
87 |
|
88 |
# Load weights from numpy
|
89 |
+
load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path)
|
90 |
|
91 |
# Save pytorch-model
|
92 |
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
|
|
|
100 |
|
101 |
# XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
|
102 |
# https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
|
103 |
+
def load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path):
|
104 |
"""Load tf checkpoints in a pytorch model"""
|
105 |
try:
|
106 |
import re
|
|
|
111 |
"https://www.tensorflow.org/install/ for installation instructions."
|
112 |
)
|
113 |
raise
|
114 |
+
tf_path = os.path.abspath(aragpt2_checkpoint_path)
|
115 |
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
|
116 |
# Load weights from TF model
|
117 |
init_vars = tf.train.list_variables(tf_path)
|
|
|
142 |
|
143 |
if sname == "" or sname == "embeddings":
|
144 |
continue
|
145 |
+
elif sname not in _ARAGPT2_ML_TF_TO_TORCH:
|
146 |
print("=========================================================")
|
147 |
logger.info("Skip var name {}".format(scope_names))
|
148 |
pointer = None
|
149 |
break
|
150 |
else:
|
151 |
+
tname = _ARAGPT2_ML_TF_TO_TORCH[sname]
|
152 |
if "." in tname:
|
153 |
parent, child = tname.split(".")
|
154 |
pointer = getattr(pointer, parent)
|
|
|
587 |
"""
|
588 |
|
589 |
config_class = AraGPT2Config
|
590 |
+
load_tf_weights = load_tf_weights_in_aragpt2
|
591 |
base_model_prefix = "transformer"
|
592 |
is_parallelizable = True
|
593 |
supports_gradient_checkpointing = True
|
|
|
813 |
_keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
|
814 |
_keys_to_ignore_on_load_missing = ["attn.masked_bias"]
|
815 |
|
816 |
+
def __init__(self, config: AraGPT2Config):
|
817 |
super().__init__(config)
|
818 |
|
819 |
self.embed_dim = config.hidden_size
|
|
|
1162 |
]
|
1163 |
_tied_weights_keys = ["lm_head.weight"]
|
1164 |
|
1165 |
+
def __init__(self, config: AraGPT2Config):
|
1166 |
super().__init__(config)
|
1167 |
self.transformer = AraGPT2Model(config)
|
1168 |
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
|
|
|
1384 |
]
|
1385 |
_tied_weights_keys = ["lm_head.weight"]
|
1386 |
|
1387 |
+
def __init__(self, config: AraGPT2Config):
|
1388 |
super().__init__(config)
|
1389 |
config.num_labels = 1
|
1390 |
self.transformer = AraGPT2Model(config)
|
|
|
1638 |
]
|
1639 |
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
|
1640 |
|
1641 |
+
def __init__(self, config: AraGPT2Config):
|
1642 |
super().__init__(config)
|
1643 |
self.num_labels = config.num_labels
|
1644 |
self.transformer = AraGPT2Model(config)
|
|
|
1774 |
AraGPT2_START_DOCSTRING,
|
1775 |
)
|
1776 |
class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
|
1777 |
+
def __init__(self, config: AraGPT2Config):
|
1778 |
super().__init__(config)
|
1779 |
self.num_labels = config.num_labels
|
1780 |
|
|
|
1875 |
AraGPT2_START_DOCSTRING,
|
1876 |
)
|
1877 |
class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
|
1878 |
+
def __init__(self, config: AraGPT2Config):
|
1879 |
super().__init__(config)
|
1880 |
self.num_labels = config.num_labels
|
1881 |
self.transformer = AraGPT2Model(config)
|