Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -202,10 +202,8 @@ try:
|
|
| 202 |
model_name,
|
| 203 |
torch_dtype=torch.float32, # Changed to float32 to avoid Half/Float mismatch
|
| 204 |
low_cpu_mem_usage=True
|
| 205 |
-
)
|
| 206 |
-
|
| 207 |
-
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8).to(device)
|
| 208 |
-
logger.info(f"Successfully loaded and quantized model: {model_name}")
|
| 209 |
except Exception as e:
|
| 210 |
logger.error(f"Error loading model/tokenizer: {e}")
|
| 211 |
raise RuntimeError(f"Failed to load model: {str(e)}")
|
|
|
|
| 202 |
model_name,
|
| 203 |
torch_dtype=torch.float32, # Changed to float32 to avoid Half/Float mismatch
|
| 204 |
low_cpu_mem_usage=True
|
| 205 |
+
).to(device)
|
| 206 |
+
logger.info(f"Successfully loaded model: {model_name}")
|
|
|
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
logger.error(f"Error loading model/tokenizer: {e}")
|
| 209 |
raise RuntimeError(f"Failed to load model: {str(e)}")
|