Harshu0117 commited on
Commit
464f02c
·
verified ·
1 Parent(s): 2fca8ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -4
app.py CHANGED
@@ -21,16 +21,12 @@ def load_model():
21
  # Load model with CPU optimizations
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_name,
24
- torch_dtype=torch.float16, # Use float16 for faster CPU inference
25
  device_map="cpu",
26
  trust_remote_code=True,
27
  low_cpu_mem_usage=True, # Reduce memory usage
28
  offload_folder="offload" # Enable model offloading
29
  )
30
 
31
- # Convert to float16 for faster inference
32
- model = model.half()
33
-
34
  # Enable CPU optimizations
35
  model.eval()
36
 
 
21
  # Load model with CPU optimizations
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_name,
 
24
  device_map="cpu",
25
  trust_remote_code=True,
26
  low_cpu_mem_usage=True, # Reduce memory usage
27
  offload_folder="offload" # Enable model offloading
28
  )
29
 
 
 
 
30
  # Enable CPU optimizations
31
  model.eval()
32