Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -61,9 +61,9 @@ vqllm = AutoModelForCausalLM.from_pretrained(
|
|
61 |
model_id,
|
62 |
attn_implementation='flash_attention_2',
|
63 |
torch_dtype=torch.bfloat16,
|
64 |
-
load_in_8bit=True,
|
65 |
-
max_memory={0: "40GiB" },
|
66 |
-
)
|
67 |
|
68 |
stop_flag = False
|
69 |
|
|
|
61 |
model_id,
|
62 |
attn_implementation='flash_attention_2',
|
63 |
torch_dtype=torch.bfloat16,
|
64 |
+
# load_in_8bit=True,
|
65 |
+
# max_memory={0: "40GiB" },
|
66 |
+
).to("cuda:0")
|
67 |
|
68 |
stop_flag = False
|
69 |
|