Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,16 +21,12 @@ def load_model():
|
|
21 |
# Load model with CPU optimizations
|
22 |
model = AutoModelForCausalLM.from_pretrained(
|
23 |
model_name,
|
24 |
-
torch_dtype=torch.float16, # Use float16 for faster CPU inference
|
25 |
device_map="cpu",
|
26 |
trust_remote_code=True,
|
27 |
low_cpu_mem_usage=True, # Reduce memory usage
|
28 |
offload_folder="offload" # Enable model offloading
|
29 |
)
|
30 |
|
31 |
-
# Convert to float16 for faster inference
|
32 |
-
model = model.half()
|
33 |
-
|
34 |
# Enable CPU optimizations
|
35 |
model.eval()
|
36 |
|
|
|
21 |
# Load model with CPU optimizations
|
22 |
model = AutoModelForCausalLM.from_pretrained(
|
23 |
model_name,
|
|
|
24 |
device_map="cpu",
|
25 |
trust_remote_code=True,
|
26 |
low_cpu_mem_usage=True, # Reduce memory usage
|
27 |
offload_folder="offload" # Enable model offloading
|
28 |
)
|
29 |
|
|
|
|
|
|
|
30 |
# Enable CPU optimizations
|
31 |
model.eval()
|
32 |
|