Junfeng5 commited on
Commit
9eb57f2
·
verified ·
1 Parent(s): 5a81375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -61,9 +61,9 @@ vqllm = AutoModelForCausalLM.from_pretrained(
61
  model_id,
62
  attn_implementation='flash_attention_2',
63
  torch_dtype=torch.bfloat16,
64
- load_in_8bit=True,
65
- max_memory={0: "40GiB" },
66
- ) # .to("cuda:0")
67
 
68
  stop_flag = False
69
 
 
61
  model_id,
62
  attn_implementation='flash_attention_2',
63
  torch_dtype=torch.bfloat16,
64
+ # load_in_8bit=True,
65
+ # max_memory={0: "40GiB" },
66
+ ).to("cuda:0")
67
 
68
  stop_flag = False
69