Upload DogeForCausalLM
Browse files- modeling_doge.py +0 -10
modeling_doge.py
CHANGED
@@ -875,16 +875,6 @@ class DogeModel(DogePreTrainedModel):
|
|
875 |
past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
|
876 |
using_static_cache = isinstance(past_key_values, StaticCache)
|
877 |
|
878 |
-
# When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward
|
879 |
-
if self.config._attn_implementation == "sdpa" and not using_static_cache and not output_attentions:
|
880 |
-
if AttentionMaskConverter._ignore_causal_mask_sdpa(
|
881 |
-
attention_mask,
|
882 |
-
inputs_embeds=input_tensor,
|
883 |
-
past_key_values_length=past_seen_tokens,
|
884 |
-
is_training=self.training,
|
885 |
-
):
|
886 |
-
return None
|
887 |
-
|
888 |
dtype, device = input_tensor.dtype, input_tensor.device
|
889 |
sequence_length = input_tensor.shape[1]
|
890 |
if using_static_cache:
|
|
|
875 |
past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
|
876 |
using_static_cache = isinstance(past_key_values, StaticCache)
|
877 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
878 |
dtype, device = input_tensor.dtype, input_tensor.device
|
879 |
sequence_length = input_tensor.shape[1]
|
880 |
if using_static_cache:
|