Spaces:

AnilNiraula
/

FinChat

Running

AnilNiraula commited on Jul 13

Commit

bf676ca

verified ·

1 Parent(s): a6cd710

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -202,10 +202,8 @@ try:
         model_name,
         torch_dtype=torch.float32,  # Changed to float32 to avoid Half/Float mismatch
         low_cpu_mem_usage=True
-    )
-    # Quantize the model for faster CPU inference
-    model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8).to(device)
-    logger.info(f"Successfully loaded and quantized model: {model_name}")
 except Exception as e:
     logger.error(f"Error loading model/tokenizer: {e}")
     raise RuntimeError(f"Failed to load model: {str(e)}")

         model_name,
         torch_dtype=torch.float32,  # Changed to float32 to avoid Half/Float mismatch
         low_cpu_mem_usage=True
+    ).to(device)
+    logger.info(f"Successfully loaded model: {model_name}")
 except Exception as e:
     logger.error(f"Error loading model/tokenizer: {e}")
     raise RuntimeError(f"Failed to load model: {str(e)}")