Text Generation
Transformers
Safetensors
English
doge
conversational
custom_code
JingzeShi commited on
Commit
09899c7
·
verified ·
1 Parent(s): b900d3c

Upload DogeForCausalLM

Browse files
Files changed (1) hide show
  1. modeling_doge.py +0 -10
modeling_doge.py CHANGED
@@ -875,16 +875,6 @@ class DogeModel(DogePreTrainedModel):
875
  past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
876
  using_static_cache = isinstance(past_key_values, StaticCache)
877
 
878
- # When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward
879
- if self.config._attn_implementation == "sdpa" and not using_static_cache and not output_attentions:
880
- if AttentionMaskConverter._ignore_causal_mask_sdpa(
881
- attention_mask,
882
- inputs_embeds=input_tensor,
883
- past_key_values_length=past_seen_tokens,
884
- is_training=self.training,
885
- ):
886
- return None
887
-
888
  dtype, device = input_tensor.dtype, input_tensor.device
889
  sequence_length = input_tensor.shape[1]
890
  if using_static_cache:
 
875
  past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
876
  using_static_cache = isinstance(past_key_values, StaticCache)
877
 
 
 
 
 
 
 
 
 
 
 
878
  dtype, device = input_tensor.dtype, input_tensor.device
879
  sequence_length = input_tensor.shape[1]
880
  if using_static_cache: