Spaces:

usag1e
/

my-llm-endpoint-fresh

Runtime error

usag1e commited on about 1 month ago

Commit

db3d08a

1 Parent(s): a018209

Fix quantization configuration and update dependencies

Files changed (2) hide show

app.py CHANGED Viewed

@@ -3,19 +3,28 @@ from pydantic import BaseModel
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load the model and tokenizer
-MODEL_NAME = "deepseek-ai/DeepSeek-V3-Base"  # Change to the model you want
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    device_map="auto",
-    trust_remote_code=True,  # Allow execution of custom code
-    low_cpu_mem_usage=True  # Ensures reduced memory usage
-).to(device)
 app = FastAPI()
 class Query(BaseModel):
     input_text: str

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Model configuration
+MODEL_NAME = "deepseek-ai/DeepSeek-V3-Base"  # Hugging Face model
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load model and tokenizer
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        device_map="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+        revision="main"
+    ).to(device)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    raise
+# FastAPI app initialization
 app = FastAPI()
+# Input schema
 class Query(BaseModel):
     input_text: str

requirements.txt CHANGED Viewed

@@ -2,5 +2,6 @@ fastapi
 uvicorn[standard]
 torch
 transformers
 huggingface_hub
 accelerate>=0.26.0

 uvicorn[standard]
 torch
 transformers
+accelerate
 huggingface_hub
 accelerate>=0.26.0