Spaces:
Running
on
Zero
Running
on
Zero
flash-attn
Browse files- app.py +3 -3
- requirements.txt +1 -0
app.py
CHANGED
@@ -11,11 +11,11 @@ model_path = "microsoft/Phi-4-multimodal-instruct"
|
|
11 |
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
model_path,
|
14 |
-
device_map="
|
15 |
torch_dtype="auto",
|
16 |
trust_remote_code=True,
|
17 |
-
attn_implementation='
|
18 |
-
)
|
19 |
|
20 |
generation_config = GenerationConfig.from_pretrained(model_path)
|
21 |
|
|
|
11 |
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
12 |
model = AutoModelForCausalLM.from_pretrained(
|
13 |
model_path,
|
14 |
+
device_map="cuda",
|
15 |
torch_dtype="auto",
|
16 |
trust_remote_code=True,
|
17 |
+
attn_implementation='flash_attention_2',
|
18 |
+
).cuda()
|
19 |
|
20 |
generation_config = GenerationConfig.from_pretrained(model_path)
|
21 |
|
requirements.txt
CHANGED
@@ -7,5 +7,6 @@ scipy
|
|
7 |
soundfile
|
8 |
pillow
|
9 |
accelerate
|
|
|
10 |
transformers
|
11 |
backoff
|
|
|
7 |
soundfile
|
8 |
pillow
|
9 |
accelerate
|
10 |
+
flash-attn
|
11 |
transformers
|
12 |
backoff
|