ariG23498 HF staff commited on
Commit
6cec260
·
1 Parent(s): 97e7627

remove flash

Browse files
Files changed (2) hide show
  1. app.py +6 -10
  2. requirements.txt +5 -5
app.py CHANGED
@@ -13,14 +13,11 @@ model_path = "microsoft/Phi-4-multimodal-instruct"
13
  processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_path,
16
- device_map="cuda",
17
  torch_dtype="auto",
18
  trust_remote_code=True,
19
- attn_implementation="eager", # Changed from 'flash_attention_2' to 'eager'
20
- ).cuda()
21
-
22
- # Load generation config
23
- generation_config = GenerationConfig.from_pretrained(model_path)
24
 
25
  # Define prompt structure
26
  user_prompt = '<|user|>'
@@ -38,12 +35,12 @@ def process_input(input_type, file, question):
38
  prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
39
  # Open image from uploaded file
40
  image = Image.open(file)
41
- inputs = processor(text=prompt, images=image, return_tensors='pt').to('cuda:0')
42
  elif input_type == "Audio":
43
  prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
44
  # Read audio from uploaded file
45
  audio, samplerate = sf.read(file)
46
- inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to('cuda:0')
47
  else:
48
  return "Invalid input type selected."
49
 
@@ -51,8 +48,7 @@ def process_input(input_type, file, question):
51
  with torch.no_grad():
52
  generate_ids = model.generate(
53
  **inputs,
54
- max_new_tokens=1000,
55
- generation_config=generation_config,
56
  num_logits_to_keep=0,
57
  )
58
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
 
13
  processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_path,
16
+ device_map="auto",
17
  torch_dtype="auto",
18
  trust_remote_code=True,
19
+ _attn_implementation="eager",
20
+ )
 
 
 
21
 
22
  # Define prompt structure
23
  user_prompt = '<|user|>'
 
35
  prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
36
  # Open image from uploaded file
37
  image = Image.open(file)
38
+ inputs = processor(text=prompt, images=image, return_tensors='pt').to(model.device)
39
  elif input_type == "Audio":
40
  prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
41
  # Read audio from uploaded file
42
  audio, samplerate = sf.read(file)
43
+ inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(model.device)
44
  else:
45
  return "Invalid input type selected."
46
 
 
48
  with torch.no_grad():
49
  generate_ids = model.generate(
50
  **inputs,
51
+ max_new_tokens=200,
 
52
  num_logits_to_keep=0,
53
  )
54
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
  gradio
2
  spaces
 
3
  torch
4
- peft
5
- torchvision
6
- scipy
7
- soundfile
8
  pillow
9
- accelerate
10
  transformers
 
 
 
11
  backoff
 
1
  gradio
2
  spaces
3
+ requests
4
  torch
 
 
 
 
5
  pillow
6
+ soundfile
7
  transformers
8
+ torchvision
9
+ scipy
10
+ peft
11
  backoff