ved1beta commited on
Commit
cb872ce
·
1 Parent(s): dd39d5d
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import subprocess
2
- # Installing flash_attn
3
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
 
5
  import gradio as gr
6
  from PIL import Image
@@ -13,9 +13,14 @@ import torch
13
  import spaces
14
 
15
  model_id = "microsoft/Phi-3-vision-128k-instruct"
16
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu", trust_remote_code=True, torch_dtype="auto")
 
 
 
 
 
 
17
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
18
- model.to("cpu")
19
 
20
  PLACEHOLDER = """
21
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
@@ -30,30 +35,24 @@ def bot_streaming(message, history):
30
  print(f'message is - {message}')
31
  print(f'history is - {history}')
32
  if message["files"]:
33
- # message["files"][-1] is a Dict or just a string
34
  if type(message["files"][-1]) == dict:
35
  image = message["files"][-1]["path"]
36
  else:
37
  image = message["files"][-1]
38
  else:
39
- # if there's no image uploaded for this turn, look for images in the past turns
40
- # kept inside tuples, take the last one
41
  for hist in history:
42
  if type(hist[0]) == tuple:
43
  image = hist[0][0]
44
  try:
45
  if image is None:
46
- # Handle the case where image is None
47
  raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
48
  except NameError:
49
- # Handle the case where 'image' is not defined at all
50
  raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
51
 
52
  conversation = []
53
  flag=False
54
  for user, assistant in history:
55
  if assistant is None:
56
- #pass
57
  flag=True
58
  conversation.extend([{"role": "user", "content":""}])
59
  continue
@@ -71,10 +70,17 @@ def bot_streaming(message, history):
71
  print(f"prompt is -\n{conversation}")
72
  prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
73
  image = Image.open(image)
74
- inputs = processor(prompt, image, return_tensors="pt").to("cpu")
75
 
76
  streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
77
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False, temperature=0.0, eos_token_id=processor.tokenizer.eos_token_id,)
 
 
 
 
 
 
 
78
 
79
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
80
  thread.start()
@@ -84,7 +90,6 @@ def bot_streaming(message, history):
84
  buffer += new_text
85
  yield buffer
86
 
87
-
88
  chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
89
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
90
  with gr.Blocks(fill_height=True, ) as demo:
 
1
  import subprocess
2
+ # Remove flash-attn installation
3
+ # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
 
5
  import gradio as gr
6
  from PIL import Image
 
13
  import spaces
14
 
15
  model_id = "microsoft/Phi-3-vision-128k-instruct"
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_id,
18
+ device_map="cpu",
19
+ trust_remote_code=True,
20
+ torch_dtype=torch.float32, # Explicitly set to float32
21
+ attn_implementation="eager" # Disable FlashAttention
22
+ )
23
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 
24
 
25
  PLACEHOLDER = """
26
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
 
35
  print(f'message is - {message}')
36
  print(f'history is - {history}')
37
  if message["files"]:
 
38
  if type(message["files"][-1]) == dict:
39
  image = message["files"][-1]["path"]
40
  else:
41
  image = message["files"][-1]
42
  else:
 
 
43
  for hist in history:
44
  if type(hist[0]) == tuple:
45
  image = hist[0][0]
46
  try:
47
  if image is None:
 
48
  raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
49
  except NameError:
 
50
  raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
51
 
52
  conversation = []
53
  flag=False
54
  for user, assistant in history:
55
  if assistant is None:
 
56
  flag=True
57
  conversation.extend([{"role": "user", "content":""}])
58
  continue
 
70
  print(f"prompt is -\n{conversation}")
71
  prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
72
  image = Image.open(image)
73
+ inputs = processor(prompt, image, return_tensors="pt")
74
 
75
  streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
76
+ generation_kwargs = dict(
77
+ inputs,
78
+ streamer=streamer,
79
+ max_new_tokens=1024,
80
+ do_sample=False,
81
+ temperature=0.0,
82
+ eos_token_id=processor.tokenizer.eos_token_id
83
+ )
84
 
85
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
86
  thread.start()
 
90
  buffer += new_text
91
  yield buffer
92
 
 
93
  chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
94
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
95
  with gr.Blocks(fill_height=True, ) as demo: