Spaces:
Running
on
Zero
Running
on
Zero
no flash attn
Browse files
app.py
CHANGED
@@ -8,11 +8,11 @@ import torch
|
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
9 |
import subprocess
|
10 |
|
11 |
-
subprocess.run(
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
)
|
16 |
|
17 |
|
18 |
DESCRIPTION = """\
|
@@ -36,8 +36,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
36 |
model_id,
|
37 |
device_map="auto",
|
38 |
torch_dtype=torch.bfloat16,
|
39 |
-
attn_implementation="
|
40 |
-
trust_remote_code=True,
|
41 |
)
|
42 |
model.config.sliding_window = 4096
|
43 |
model.eval()
|
|
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
9 |
import subprocess
|
10 |
|
11 |
+
# subprocess.run(
|
12 |
+
# "pip install flash-attn --no-build-isolation",
|
13 |
+
# env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
14 |
+
# shell=True,
|
15 |
+
# )
|
16 |
|
17 |
|
18 |
DESCRIPTION = """\
|
|
|
36 |
model_id,
|
37 |
device_map="auto",
|
38 |
torch_dtype=torch.bfloat16,
|
39 |
+
attn_implementation="eager",
|
|
|
40 |
)
|
41 |
model.config.sliding_window = 4096
|
42 |
model.eval()
|