tmberooney commited on
Commit
cb91778
·
1 Parent(s): 6ac49a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -17,16 +17,27 @@ import os
17
 
18
  secret_key = os.getenv("AUTH")
19
 
20
-
21
 
22
  from peft import PeftModel, PeftConfig
23
  from transformers import AutoModelForCausalLM, AutoTokenizer
24
 
25
  config = PeftConfig.from_pretrained("tmberooney/medllama")
26
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_auth_token=secret_key, load_in_4bit=True, torch_dtype=torch.float16, device_map="auto")
27
  model = PeftModel.from_pretrained(model, "tmberooney/medllama")
28
  tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
29
- model = model.to('cuda:0')
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
 
 
17
 
18
  secret_key = os.getenv("AUTH")
19
 
20
+ from torch import nn
21
 
22
  from peft import PeftModel, PeftConfig
23
  from transformers import AutoModelForCausalLM, AutoTokenizer
24
 
25
  config = PeftConfig.from_pretrained("tmberooney/medllama")
26
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", use_auth_token=secret_key, llm_int8_enable_fp32_cpu_offload=True, torch_dtype=torch.float16)
27
  model = PeftModel.from_pretrained(model, "tmberooney/medllama")
28
  tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
29
+ device_map = {"transformer.word_embeddings": "cpu",
30
+ "transformer.word_embeddings_layernorm": "cpu",
31
+ "lm_head": "cpu",
32
+ "transformer.h": "cpu",
33
+ "transformer.ln_f": "cpu"}
34
+ model = nn.DataParallel(model)
35
+
36
+ # Move the model parameters to the specified devices
37
+ for name, param in model.named_parameters():
38
+ if name in device_map:
39
+ param.data = param.to(device=device_map[name])
40
+ #model = model.to('cuda:0')
41
 
42
 
43