Spaces:
Sleeping
Sleeping
Plat
commited on
Commit
·
a71047d
1
Parent(s):
68a9510
chore: install flash-attn at runtime
Browse files- app.py +12 -0
- requirements.txt +0 -1
app.py
CHANGED
@@ -19,6 +19,18 @@ except:
|
|
19 |
return lambda x: x
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
|
23 |
|
24 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
|
19 |
return lambda x: x
|
20 |
|
21 |
|
22 |
+
try:
|
23 |
+
import flash_attn
|
24 |
+
except:
|
25 |
+
import subprocess
|
26 |
+
|
27 |
+
subprocess.run(
|
28 |
+
"pip install flash-attn --no-build-isolation",
|
29 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
30 |
+
shell=True,
|
31 |
+
)
|
32 |
+
|
33 |
+
|
34 |
MODEL_NAME = "hatakeyama-llm-team/Tanuki-8B-Instruct"
|
35 |
|
36 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
requirements.txt
CHANGED
@@ -4,4 +4,3 @@ accelerate==0.30.1
|
|
4 |
transformers==4.41.2
|
5 |
spaces==0.28.3
|
6 |
bitsandbytes==0.43.1
|
7 |
-
flash-attn==2.5.9.post1
|
|
|
4 |
transformers==4.41.2
|
5 |
spaces==0.28.3
|
6 |
bitsandbytes==0.43.1
|
|