Upload folder using huggingface_hub
Browse files- LICENSE +1 -0
- README.md +1 -0
- amplify_te.py +30 -13
- config.json +2 -2
LICENSE
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
MIT License
|
| 6 |
|
| 7 |
Copyright (c) 2024 chandar-lab
|
|
|
|
| 8 |
|
| 9 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 10 |
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
| 5 |
MIT License
|
| 6 |
|
| 7 |
Copyright (c) 2024 chandar-lab
|
| 8 |
+
Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 9 |
|
| 10 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 11 |
of this software and associated documentation files (the "Software"), to deal
|
README.md
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
---
|
|
|
|
| 2 |
license: mit
|
| 3 |
datasets:
|
| 4 |
- chandar-lab/UR100P
|
|
|
|
| 1 |
---
|
| 2 |
+
library_name: transformers
|
| 3 |
license: mit
|
| 4 |
datasets:
|
| 5 |
- chandar-lab/UR100P
|
amplify_te.py
CHANGED
|
@@ -1,6 +1,27 @@
|
|
|
|
|
| 1 |
# SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
|
| 2 |
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 3 |
# SPDX-License-Identifier: MIT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
#
|
| 5 |
# Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
|
| 6 |
|
|
@@ -126,17 +147,15 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
| 126 |
config.padded_vocab_size,
|
| 127 |
config.hidden_size,
|
| 128 |
padding_idx=config.pad_token_id,
|
| 129 |
-
dtype=config.
|
| 130 |
)
|
| 131 |
|
| 132 |
if config.layer_norm_after_embedding:
|
| 133 |
self.layer_norm_1 = (
|
| 134 |
-
transformer_engine.pytorch.RMSNorm(
|
| 135 |
-
config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
|
| 136 |
-
)
|
| 137 |
if config.rms_norm
|
| 138 |
else transformer_engine.pytorch.LayerNorm(
|
| 139 |
-
config.hidden_size, config.norm_eps, params_dtype=config.
|
| 140 |
)
|
| 141 |
)
|
| 142 |
|
|
@@ -148,6 +167,9 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
| 148 |
intermediate_size = int(2 * config.intermediate_size / 3)
|
| 149 |
intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
|
| 150 |
|
|
|
|
|
|
|
|
|
|
| 151 |
self.transformer_encoder = nn.ModuleList()
|
| 152 |
for layer_num in range(config.num_hidden_layers):
|
| 153 |
self.transformer_encoder.append(
|
|
@@ -173,7 +195,7 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
| 173 |
window_size=(-1, -1),
|
| 174 |
rotary_pos_interleaved=True,
|
| 175 |
seq_length=config.max_length,
|
| 176 |
-
params_dtype=config.
|
| 177 |
)
|
| 178 |
)
|
| 179 |
|
|
@@ -191,7 +213,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
| 191 |
output_hidden_states=False,
|
| 192 |
output_attentions=False,
|
| 193 |
labels=None,
|
| 194 |
-
**kwargs,
|
| 195 |
) -> BaseModelOutput:
|
| 196 |
"""Forward pass of the AMPLIFY model.
|
| 197 |
|
|
@@ -201,7 +222,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
| 201 |
output_hidden_states (bool): Whether to output the hidden states.
|
| 202 |
output_attentions (bool): Whether to output the attention weights.
|
| 203 |
labels (torch.Tensor): The labels.
|
| 204 |
-
**kwargs: Additional arguments.
|
| 205 |
|
| 206 |
Returns:
|
| 207 |
BaseModelOutput: The output of the model.
|
|
@@ -256,7 +276,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
| 256 |
config.hidden_size,
|
| 257 |
config.padded_vocab_size,
|
| 258 |
config.norm_eps,
|
| 259 |
-
params_dtype=config.
|
| 260 |
normalization="RMSNorm" if config.rms_norm else "LayerNorm",
|
| 261 |
init_method=lambda x: torch.nn.init.uniform_(
|
| 262 |
x, -self.config.decoder_init_range, self.config.decoder_init_range
|
|
@@ -265,7 +285,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
| 265 |
|
| 266 |
else:
|
| 267 |
self.decoder = transformer_engine.pytorch.Linear(
|
| 268 |
-
config.hidden_size, config.vocab_size, params_dtype=config.
|
| 269 |
)
|
| 270 |
|
| 271 |
def forward(
|
|
@@ -275,7 +295,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
| 275 |
output_hidden_states=False,
|
| 276 |
output_attentions=False,
|
| 277 |
labels=None,
|
| 278 |
-
**kwargs,
|
| 279 |
) -> MaskedLMOutput:
|
| 280 |
"""Forward pass of the AMPLIFYForMaskedLM model.
|
| 281 |
|
|
@@ -285,7 +304,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
| 285 |
output_hidden_states (bool): Whether to output the hidden states.
|
| 286 |
output_attentions (bool): Whether to output the attention weights.
|
| 287 |
labels (torch.Tensor): The labels.
|
| 288 |
-
**kwargs: Additional arguments.
|
| 289 |
|
| 290 |
Returns:
|
| 291 |
MaskedLMOutput: The output of the model.
|
|
@@ -296,7 +314,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
| 296 |
output_hidden_states,
|
| 297 |
output_attentions,
|
| 298 |
labels,
|
| 299 |
-
**kwargs,
|
| 300 |
)
|
| 301 |
|
| 302 |
# Classification head with layer norm
|
|
|
|
| 1 |
+
# noqa: license-check
|
| 2 |
# SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
|
| 3 |
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 4 |
# SPDX-License-Identifier: MIT
|
| 5 |
+
# Copyright (c) 2024 chandar-lab
|
| 6 |
+
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 7 |
+
#
|
| 8 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 9 |
+
# of this software and associated documentation files (the "Software"), to deal
|
| 10 |
+
# in the Software without restriction, including without limitation the rights
|
| 11 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 12 |
+
# copies of the Software, and to permit persons to whom the Software is
|
| 13 |
+
# furnished to do so, subject to the following conditions:
|
| 14 |
+
#
|
| 15 |
+
# The above copyright notice and this permission notice shall be included in all
|
| 16 |
+
# copies or substantial portions of the Software.
|
| 17 |
+
#
|
| 18 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 19 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 20 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 21 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 22 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 23 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 24 |
+
# SOFTWARE.
|
| 25 |
#
|
| 26 |
# Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
|
| 27 |
|
|
|
|
| 147 |
config.padded_vocab_size,
|
| 148 |
config.hidden_size,
|
| 149 |
padding_idx=config.pad_token_id,
|
| 150 |
+
dtype=config.dtype,
|
| 151 |
)
|
| 152 |
|
| 153 |
if config.layer_norm_after_embedding:
|
| 154 |
self.layer_norm_1 = (
|
| 155 |
+
transformer_engine.pytorch.RMSNorm(config.hidden_size, config.norm_eps, params_dtype=config.dtype)
|
|
|
|
|
|
|
| 156 |
if config.rms_norm
|
| 157 |
else transformer_engine.pytorch.LayerNorm(
|
| 158 |
+
config.hidden_size, config.norm_eps, params_dtype=config.dtype
|
| 159 |
)
|
| 160 |
)
|
| 161 |
|
|
|
|
| 167 |
intermediate_size = int(2 * config.intermediate_size / 3)
|
| 168 |
intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
|
| 169 |
|
| 170 |
+
else:
|
| 171 |
+
intermediate_size = config.intermediate_size
|
| 172 |
+
|
| 173 |
self.transformer_encoder = nn.ModuleList()
|
| 174 |
for layer_num in range(config.num_hidden_layers):
|
| 175 |
self.transformer_encoder.append(
|
|
|
|
| 195 |
window_size=(-1, -1),
|
| 196 |
rotary_pos_interleaved=True,
|
| 197 |
seq_length=config.max_length,
|
| 198 |
+
params_dtype=config.dtype,
|
| 199 |
)
|
| 200 |
)
|
| 201 |
|
|
|
|
| 213 |
output_hidden_states=False,
|
| 214 |
output_attentions=False,
|
| 215 |
labels=None,
|
|
|
|
| 216 |
) -> BaseModelOutput:
|
| 217 |
"""Forward pass of the AMPLIFY model.
|
| 218 |
|
|
|
|
| 222 |
output_hidden_states (bool): Whether to output the hidden states.
|
| 223 |
output_attentions (bool): Whether to output the attention weights.
|
| 224 |
labels (torch.Tensor): The labels.
|
|
|
|
| 225 |
|
| 226 |
Returns:
|
| 227 |
BaseModelOutput: The output of the model.
|
|
|
|
| 276 |
config.hidden_size,
|
| 277 |
config.padded_vocab_size,
|
| 278 |
config.norm_eps,
|
| 279 |
+
params_dtype=config.dtype,
|
| 280 |
normalization="RMSNorm" if config.rms_norm else "LayerNorm",
|
| 281 |
init_method=lambda x: torch.nn.init.uniform_(
|
| 282 |
x, -self.config.decoder_init_range, self.config.decoder_init_range
|
|
|
|
| 285 |
|
| 286 |
else:
|
| 287 |
self.decoder = transformer_engine.pytorch.Linear(
|
| 288 |
+
config.hidden_size, config.vocab_size, params_dtype=config.dtype
|
| 289 |
)
|
| 290 |
|
| 291 |
def forward(
|
|
|
|
| 295 |
output_hidden_states=False,
|
| 296 |
output_attentions=False,
|
| 297 |
labels=None,
|
|
|
|
| 298 |
) -> MaskedLMOutput:
|
| 299 |
"""Forward pass of the AMPLIFYForMaskedLM model.
|
| 300 |
|
|
|
|
| 304 |
output_hidden_states (bool): Whether to output the hidden states.
|
| 305 |
output_attentions (bool): Whether to output the attention weights.
|
| 306 |
labels (torch.Tensor): The labels.
|
|
|
|
| 307 |
|
| 308 |
Returns:
|
| 309 |
MaskedLMOutput: The output of the model.
|
|
|
|
| 314 |
output_hidden_states,
|
| 315 |
output_attentions,
|
| 316 |
labels,
|
|
|
|
| 317 |
)
|
| 318 |
|
| 319 |
# Classification head with layer norm
|
config.json
CHANGED
|
@@ -12,6 +12,7 @@
|
|
| 12 |
"bos_token_id": 3,
|
| 13 |
"decoder_init_range": 0.02,
|
| 14 |
"dropout_prob": 0,
|
|
|
|
| 15 |
"embedding_init_range": 0.02,
|
| 16 |
"eos_token_id": 4,
|
| 17 |
"ffn_bias": false,
|
|
@@ -31,8 +32,7 @@
|
|
| 31 |
"padded_vocab_size": 32,
|
| 32 |
"pre_activation_layer_norm": true,
|
| 33 |
"rms_norm": true,
|
| 34 |
-
"
|
| 35 |
-
"transformers_version": "4.53.2",
|
| 36 |
"unk_token_id": 1,
|
| 37 |
"vocab_path": "conf/tokenizer/amplify_vocab.txt",
|
| 38 |
"vocab_size": 27
|
|
|
|
| 12 |
"bos_token_id": 3,
|
| 13 |
"decoder_init_range": 0.02,
|
| 14 |
"dropout_prob": 0,
|
| 15 |
+
"dtype": "float32",
|
| 16 |
"embedding_init_range": 0.02,
|
| 17 |
"eos_token_id": 4,
|
| 18 |
"ffn_bias": false,
|
|
|
|
| 32 |
"padded_vocab_size": 32,
|
| 33 |
"pre_activation_layer_norm": true,
|
| 34 |
"rms_norm": true,
|
| 35 |
+
"transformers_version": "4.56.1",
|
|
|
|
| 36 |
"unk_token_id": 1,
|
| 37 |
"vocab_path": "conf/tokenizer/amplify_vocab.txt",
|
| 38 |
"vocab_size": 27
|