Merge pull request #451 from OpenAccess-AI-Collective/eval-is-causal
Browse files
src/axolotl/monkeypatch/llama_attn_hijack_flash.py
CHANGED
@@ -158,7 +158,7 @@ def flashattn_forward(
|
|
158 |
else:
|
159 |
# turn off FA causal mask after first inference autoregressive iteration
|
160 |
# only on first autoregressive step q,k,v have same seqlen
|
161 |
-
is_causal =
|
162 |
|
163 |
if cu_seqlens is not None and max_seqlen is not None:
|
164 |
# special handling using sample packing
|
|
|
158 |
else:
|
159 |
# turn off FA causal mask after first inference autoregressive iteration
|
160 |
# only on first autoregressive step q,k,v have same seqlen
|
161 |
+
is_causal = key_states.shape == query_states.shape
|
162 |
|
163 |
if cu_seqlens is not None and max_seqlen is not None:
|
164 |
# special handling using sample packing
|