ariG23498 HF staff commited on
Commit
2b310a5
·
1 Parent(s): f9923d4

flash-attn

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. requirements.txt +1 -0
app.py CHANGED
@@ -11,11 +11,11 @@ model_path = "microsoft/Phi-4-multimodal-instruct"
11
  processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_path,
14
- device_map="auto",
15
  torch_dtype="auto",
16
  trust_remote_code=True,
17
- attn_implementation='eager',
18
- )
19
 
20
  generation_config = GenerationConfig.from_pretrained(model_path)
21
 
 
11
  processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_path,
14
+ device_map="cuda",
15
  torch_dtype="auto",
16
  trust_remote_code=True,
17
+ attn_implementation='flash_attention_2',
18
+ ).cuda()
19
 
20
  generation_config = GenerationConfig.from_pretrained(model_path)
21
 
requirements.txt CHANGED
@@ -7,5 +7,6 @@ scipy
7
  soundfile
8
  pillow
9
  accelerate
 
10
  transformers
11
  backoff
 
7
  soundfile
8
  pillow
9
  accelerate
10
+ flash-attn
11
  transformers
12
  backoff