kouki321 commited on
Commit
42cbd99
·
verified ·
1 Parent(s): 237da60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -82,28 +82,28 @@ def clean_up(cache, origin_len):
82
  new_cache.key_cache[i] = new_cache.key_cache[i][:, :, :origin_len, :]
83
  new_cache.value_cache[i] = new_cache.value_cache[i][:, :, :origin_len, :]
84
  return new_cache
85
- os.environ["TRANSFORMERS_OFFLINE"] = "1"
86
- os.environ["HF_HUB_OFFLINE"] = "1"
87
 
88
  # Path to your local model
89
 
90
  # Initialize model and tokenizer
91
  def load_model_and_tokenizer():
92
- model_path = "./deepseek"
93
 
94
  # Load tokenizer and model from disk (without trust_remote_code)
95
- tokenizer = AutoTokenizer.from_pretrained(model_path)
96
  if torch.cuda.is_available():
97
  # Load model on GPU if CUDA is available
98
  model = AutoModelForCausalLM.from_pretrained(
99
- model_path,
100
  torch_dtype=torch.float16,
101
  device_map="auto" # Automatically map model layers to GPU
102
  )
103
  else:
104
  # Load model on CPU if no GPU is available
105
  model = AutoModelForCausalLM.from_pretrained(
106
- model_path,
107
  torch_dtype=torch.float32, # Use float32 for compatibility with CPU
108
  low_cpu_mem_usage=True # Reduce memory usage on CPU
109
  )
 
82
  new_cache.key_cache[i] = new_cache.key_cache[i][:, :, :origin_len, :]
83
  new_cache.value_cache[i] = new_cache.value_cache[i][:, :, :origin_len, :]
84
  return new_cache
85
+ #os.environ["TRANSFORMERS_OFFLINE"] = "1"
86
+ #os.environ["HF_HUB_OFFLINE"] = "1"
87
 
88
  # Path to your local model
89
 
90
  # Initialize model and tokenizer
91
  def load_model_and_tokenizer():
92
+ model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
93
 
94
  # Load tokenizer and model from disk (without trust_remote_code)
95
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
96
  if torch.cuda.is_available():
97
  # Load model on GPU if CUDA is available
98
  model = AutoModelForCausalLM.from_pretrained(
99
+ model_name,
100
  torch_dtype=torch.float16,
101
  device_map="auto" # Automatically map model layers to GPU
102
  )
103
  else:
104
  # Load model on CPU if no GPU is available
105
  model = AutoModelForCausalLM.from_pretrained(
106
+ model_name,
107
  torch_dtype=torch.float32, # Use float32 for compatibility with CPU
108
  low_cpu_mem_usage=True # Reduce memory usage on CPU
109
  )