Update app.py
Browse files
app.py
CHANGED
@@ -97,13 +97,14 @@ def load_model_and_tokenizer():
|
|
97 |
#"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
98 |
#"facebook/opt-125m"
|
99 |
# Load tokenizer and model from disk (without trust_remote_code)
|
100 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
101 |
if torch.cuda.is_available():
|
102 |
# Load model on GPU if CUDA is available
|
103 |
model = AutoModelForCausalLM.from_pretrained(
|
104 |
model_name,
|
105 |
torch_dtype=torch.float16,
|
106 |
device_map="auto" # Automatically map model layers to GPU
|
|
|
107 |
)
|
108 |
else:
|
109 |
# Load model on CPU if no GPU is available
|
@@ -111,6 +112,7 @@ def load_model_and_tokenizer():
|
|
111 |
model_name,
|
112 |
torch_dtype=torch.float32, # Use float32 for compatibility with CPU
|
113 |
low_cpu_mem_usage=True # Reduce memory usage on CPU
|
|
|
114 |
)
|
115 |
return model, tokenizer
|
116 |
|
|
|
97 |
#"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
98 |
#"facebook/opt-125m"
|
99 |
# Load tokenizer and model from disk (without trust_remote_code)
|
100 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir="/app/hf_cache" )
|
101 |
if torch.cuda.is_available():
|
102 |
# Load model on GPU if CUDA is available
|
103 |
model = AutoModelForCausalLM.from_pretrained(
|
104 |
model_name,
|
105 |
torch_dtype=torch.float16,
|
106 |
device_map="auto" # Automatically map model layers to GPU
|
107 |
+
,cache_dir="/app/hf_cache"
|
108 |
)
|
109 |
else:
|
110 |
# Load model on CPU if no GPU is available
|
|
|
112 |
model_name,
|
113 |
torch_dtype=torch.float32, # Use float32 for compatibility with CPU
|
114 |
low_cpu_mem_usage=True # Reduce memory usage on CPU
|
115 |
+
, cache_dir="/app/hf_cache"
|
116 |
)
|
117 |
return model, tokenizer
|
118 |
|