tmberooney commited on
Commit
94fb59c
·
1 Parent(s): 09372b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -21,10 +21,9 @@ from torch import nn
21
  from peft import PeftModel, PeftConfig
22
  from transformers import AutoModelForCausalLM, AutoTokenizer
23
 
24
- config = PeftConfig.from_pretrained("tmberooney/medstralchatbotgptq")
25
- model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-DARE-GPTQ",load_in_4bit=True, torch_dtype=torch.float16, llm_int8_enable_fp32_cpu_offload=True)
26
- model = PeftModel.from_pretrained(model, "tmberooney/medstralchatbotgptq")
27
- tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
28
  device_map = {"transformer.word_embeddings": "cpu",
29
  "transformer.word_embeddings_layernorm": "cpu",
30
  "lm_head": "cpu",
@@ -45,7 +44,7 @@ for name, param in model.named_parameters():
45
  """### Using Gradio App"""
46
 
47
  from transformers import pipeline
48
-
49
  llama_pipeline = pipeline(
50
  "text-generation", # LLM task
51
  model=model,
 
21
  from peft import PeftModel, PeftConfig
22
  from transformers import AutoModelForCausalLM, AutoTokenizer
23
 
24
+ config = PeftConfig.from_pretrained("tmberooney/medllama")
25
+ model = AutoModelForCausalLM.from_pretrained("daryl149/llama-2-7b-chat-hf", use_auth_token=secret_key, llm_int8_enable_fp32_cpu_offload=True, torch_dtype=torch.float16)
26
+ model = PeftModel.from_pretrained(model, "tmberooney/medllama", use_auth_token=secret_key)
 
27
  device_map = {"transformer.word_embeddings": "cpu",
28
  "transformer.word_embeddings_layernorm": "cpu",
29
  "lm_head": "cpu",
 
44
  """### Using Gradio App"""
45
 
46
  from transformers import pipeline
47
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
48
  llama_pipeline = pipeline(
49
  "text-generation", # LLM task
50
  model=model,