Pectics commited on
Commit
61f2a3d
·
1 Parent(s): 03d2f46

Dec ZeroGPU usage

Browse files
Files changed (2) hide show
  1. app.py +17 -10
  2. requirements.txt +2 -1
app.py CHANGED
@@ -11,7 +11,7 @@ model_path = "Pectics/Softie-VL-7B-250123"
11
  model = Qwen2VLForConditionalGeneration.from_pretrained(
12
  model_path,
13
  torch_dtype=bfloat16,
14
- #attn_implementation="flash_attention_2",
15
  device_map="auto",
16
  )
17
  min_pixels = 256 * 28 * 28
@@ -19,19 +19,12 @@ max_pixels = 1280 * 28 * 28
19
  processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
20
 
21
  @spaces.GPU
22
- def respond(
23
- message,
24
- history,
25
- system_message,
26
  max_tokens,
27
  temperature,
28
  top_p,
29
  ):
30
- messages = [{"role": "system", "content": system_message}]
31
- for m in history:
32
- messages.append({"role": m["role"], "content": m["content"]})
33
- messages.append({"role": "user", "content": message})
34
-
35
  text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
36
  image_inputs, video_inputs = process_vision_info(messages)
37
  inputs = processor(
@@ -58,6 +51,20 @@ def respond(
58
  response += token
59
  yield response
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  app = gr.ChatInterface(
62
  respond,
63
  type="messages",
 
11
  model = Qwen2VLForConditionalGeneration.from_pretrained(
12
  model_path,
13
  torch_dtype=bfloat16,
14
+ attn_implementation="flash_attention_2",
15
  device_map="auto",
16
  )
17
  min_pixels = 256 * 28 * 28
 
19
  processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)
20
 
21
  @spaces.GPU
22
+ def infer(
23
+ messages,
 
 
24
  max_tokens,
25
  temperature,
26
  top_p,
27
  ):
 
 
 
 
 
28
  text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
29
  image_inputs, video_inputs = process_vision_info(messages)
30
  inputs = processor(
 
51
  response += token
52
  yield response
53
 
54
+ def respond(
55
+ message,
56
+ history,
57
+ system_message,
58
+ max_tokens,
59
+ temperature,
60
+ top_p,
61
+ ):
62
+ messages = [{"role": "system", "content": system_message}]
63
+ for m in history:
64
+ messages.append({"role": m["role"], "content": m["content"]})
65
+ messages.append({"role": "user", "content": message})
66
+ return infer(messages, max_tokens, temperature, top_p)
67
+
68
  app = gr.ChatInterface(
69
  respond,
70
  type="messages",
requirements.txt CHANGED
@@ -3,4 +3,5 @@ torchvision
3
  transformers
4
  accelerate
5
  qwen-vl-utils
6
- gradio
 
 
3
  transformers
4
  accelerate
5
  qwen-vl-utils
6
+ gradio
7
+ flash-attn