Linseypass commited on
Commit
c2acbb6
·
1 Parent(s): 316c6ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  from nltk.tokenize import sent_tokenize
4
  import torch
5
  import ujson as json
6
- from transformers import AutoModelForCausalLM,LlamaTokenizer
7
  from peft import PeftModel
8
  from keybert import KeyBERT
9
  from keyphrase_vectorizers import KeyphraseCountVectorizer
@@ -16,7 +16,14 @@ adapters_name = 'timdettmers/guanaco-7b'
16
  # print(f"Starting to load the model {model_name} into memory")
17
  m = AutoModelForCausalLM.from_pretrained(
18
  model_name,
19
- torch_dtype=torch.bfloat16)
 
 
 
 
 
 
 
20
  m = PeftModel.from_pretrained(m, adapters_name)
21
  m = m.merge_and_unload()
22
  tok = LlamaTokenizer.from_pretrained(model_name)
 
3
  from nltk.tokenize import sent_tokenize
4
  import torch
5
  import ujson as json
6
+ from transformers import AutoModelForCausalLM,LlamaTokenizer,BitsAndBytesConfig
7
  from peft import PeftModel
8
  from keybert import KeyBERT
9
  from keyphrase_vectorizers import KeyphraseCountVectorizer
 
16
  # print(f"Starting to load the model {model_name} into memory")
17
  m = AutoModelForCausalLM.from_pretrained(
18
  model_name,
19
+ torch_dtype=torch.bfloat16,
20
+ quantization_config=BitsAndBytesConfig(
21
+ load_in_4bit=True,
22
+ bnb_4bit_compute_dtype=torch.bfloat16,
23
+ bnb_4bit_use_double_quant=True,
24
+ bnb_4bit_quant_type='nf4'
25
+ ),
26
+ )
27
  m = PeftModel.from_pretrained(m, adapters_name)
28
  m = m.merge_and_unload()
29
  tok = LlamaTokenizer.from_pretrained(model_name)