kouki321 commited on
Commit
3f2015b
·
verified ·
1 Parent(s): be2b39e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -97,13 +97,14 @@ def load_model_and_tokenizer():
97
  #"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
98
  #"facebook/opt-125m"
99
  # Load tokenizer and model from disk (without trust_remote_code)
100
- tokenizer = AutoTokenizer.from_pretrained(model_name)
101
  if torch.cuda.is_available():
102
  # Load model on GPU if CUDA is available
103
  model = AutoModelForCausalLM.from_pretrained(
104
  model_name,
105
  torch_dtype=torch.float16,
106
  device_map="auto" # Automatically map model layers to GPU
 
107
  )
108
  else:
109
  # Load model on CPU if no GPU is available
@@ -111,6 +112,7 @@ def load_model_and_tokenizer():
111
  model_name,
112
  torch_dtype=torch.float32, # Use float32 for compatibility with CPU
113
  low_cpu_mem_usage=True # Reduce memory usage on CPU
 
114
  )
115
  return model, tokenizer
116
 
 
97
  #"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
98
  #"facebook/opt-125m"
99
  # Load tokenizer and model from disk (without trust_remote_code)
100
+ tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir="/app/hf_cache" )
101
  if torch.cuda.is_available():
102
  # Load model on GPU if CUDA is available
103
  model = AutoModelForCausalLM.from_pretrained(
104
  model_name,
105
  torch_dtype=torch.float16,
106
  device_map="auto" # Automatically map model layers to GPU
107
+ ,cache_dir="/app/hf_cache"
108
  )
109
  else:
110
  # Load model on CPU if no GPU is available
 
112
  model_name,
113
  torch_dtype=torch.float32, # Use float32 for compatibility with CPU
114
  low_cpu_mem_usage=True # Reduce memory usage on CPU
115
+ , cache_dir="/app/hf_cache"
116
  )
117
  return model, tokenizer
118