satishpednekar commited on
Commit
7bff2c6
·
verified ·
1 Parent(s): 7d3d345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -45,22 +45,28 @@ def load_model_gpu():
45
  return model, tokenizer
46
 
47
  def load_model():
48
- # Load in pure CPU mode without quantization
49
  config = PeftConfig.from_pretrained("satishpednekar/sbx-qhelper-mistral-loraWeights")
50
 
51
  model = AutoModelForCausalLM.from_pretrained(
52
  config.base_model_name_or_path,
53
- torch_dtype=torch.float32, # Use float32 for CPU
54
- device_map=None, # Force CPU
55
  trust_remote_code=True,
56
- load_in_4bit=False # Remove quantization
57
  )
58
 
59
- model = PeftModel.from_pretrained(model, "satishpednekar/sbx-qhelper-mistral-loraWeights")
60
- tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 
 
 
 
 
 
 
 
61
 
62
- # Explicitly move to CPU
63
- model = model.to("cpu")
64
 
65
  return model, tokenizer
66
 
 
45
  return model, tokenizer
46
 
47
  def load_model():
 
48
  config = PeftConfig.from_pretrained("satishpednekar/sbx-qhelper-mistral-loraWeights")
49
 
50
  model = AutoModelForCausalLM.from_pretrained(
51
  config.base_model_name_or_path,
52
+ torch_dtype=torch.float32,
53
+ device_map=None,
54
  trust_remote_code=True,
55
+ # Remove all quantization-related parameters
56
  )
57
 
58
+ model = PeftModel.from_pretrained(
59
+ model,
60
+ "satishpednekar/sbx-qhelper-mistral-loraWeights",
61
+ torch_dtype=torch.float32
62
+ )
63
+
64
+ tokenizer = AutoTokenizer.from_pretrained(
65
+ config.base_model_name_or_path,
66
+ trust_remote_code=True
67
+ )
68
 
69
+ model = model.to("cpu").eval()
 
70
 
71
  return model, tokenizer
72