usag1e commited on
Commit
db3d08a
·
1 Parent(s): a018209

Fix quantization configuration and update dependencies

Browse files
Files changed (2) hide show
  1. app.py +18 -9
  2. requirements.txt +1 -0
app.py CHANGED
@@ -3,19 +3,28 @@ from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
- # Load the model and tokenizer
7
- MODEL_NAME = "deepseek-ai/DeepSeek-V3-Base" # Change to the model you want
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
10
- model = AutoModelForCausalLM.from_pretrained(
11
- MODEL_NAME,
12
- device_map="auto",
13
- trust_remote_code=True, # Allow execution of custom code
14
- low_cpu_mem_usage=True # Ensures reduced memory usage
15
- ).to(device)
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  app = FastAPI()
18
 
 
19
  class Query(BaseModel):
20
  input_text: str
21
 
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
+ # Model configuration
7
+ MODEL_NAME = "deepseek-ai/DeepSeek-V3-Base" # Hugging Face model
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
 
 
9
 
10
+ # Load model and tokenizer
11
+ try:
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_NAME,
15
+ device_map="auto",
16
+ trust_remote_code=True,
17
+ low_cpu_mem_usage=True,
18
+ revision="main"
19
+ ).to(device)
20
+ except Exception as e:
21
+ print(f"Error loading model: {e}")
22
+ raise
23
+
24
+ # FastAPI app initialization
25
  app = FastAPI()
26
 
27
+ # Input schema
28
  class Query(BaseModel):
29
  input_text: str
30
 
requirements.txt CHANGED
@@ -2,5 +2,6 @@ fastapi
2
  uvicorn[standard]
3
  torch
4
  transformers
 
5
  huggingface_hub
6
  accelerate>=0.26.0
 
2
  uvicorn[standard]
3
  torch
4
  transformers
5
+ accelerate
6
  huggingface_hub
7
  accelerate>=0.26.0