PEFT
cdh commited on
Commit
543eb13
·
verified ·
1 Parent(s): 29d5001

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -2
README.md CHANGED
@@ -42,6 +42,18 @@ For more details, please consult the associated [Github repository](https://gith
42
  # How to use it
43
 
44
  ```Python
 
 
 
 
 
 
 
 
 
 
 
 
45
  model = AutoModelForCausalLM.from_pretrained(
46
  "meta-llama/Llama-2-13b-hf",
47
  load_in_4bit=True,
@@ -55,13 +67,21 @@ model = PeftModel.from_pretrained(
55
  "sag-uniroma2/u-depp-llama-2-13b"
56
  )
57
 
 
 
 
 
 
 
 
 
58
  input_string = "He was most widely recognized for some of his books."
59
  prompt = f"""
60
  ### Input:
61
  {input_string}
62
  ### Answer:"""
63
 
64
- inputs = tokenizer(prompt, return_tensors="pt", padding=do_padding, truncation=True, max_length=CUTOFF_LEN)
65
  input_ids = inputs["input_ids"].to(model.device)
66
 
67
  with torch.no_grad():
@@ -70,7 +90,7 @@ with torch.no_grad():
70
  generation_config=generation_config,
71
  return_dict_in_generate=True,
72
  output_scores=True,
73
- max_new_tokens=MAX_NEW_TOKENS,
74
  use_cache=True,
75
  )
76
  s = gen_outputs.sequences[0]
 
42
  # How to use it
43
 
44
  ```Python
45
+ import transformers
46
+ import torch
47
+ from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig
48
+ from peft import PeftModel
49
+
50
+ quant_config = BitsAndBytesConfig(
51
+ load_in_4bit=True,
52
+ bnb_4bit_use_double_quant=True,
53
+ bnb_4bit_quant_type="nf4",
54
+ bnb_4bit_compute_dtype=torch.bfloat16
55
+ )
56
+
57
  model = AutoModelForCausalLM.from_pretrained(
58
  "meta-llama/Llama-2-13b-hf",
59
  load_in_4bit=True,
 
67
  "sag-uniroma2/u-depp-llama-2-13b"
68
  )
69
 
70
+ generation_config = GenerationConfig(
71
+ num_beams=4,
72
+ do_sample=False,
73
+ early_stopping=True,
74
+ )
75
+
76
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-hf", trust_remote_code=True)
77
+
78
  input_string = "He was most widely recognized for some of his books."
79
  prompt = f"""
80
  ### Input:
81
  {input_string}
82
  ### Answer:"""
83
 
84
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
85
  input_ids = inputs["input_ids"].to(model.device)
86
 
87
  with torch.no_grad():
 
90
  generation_config=generation_config,
91
  return_dict_in_generate=True,
92
  output_scores=True,
93
+ max_new_tokens=1024,
94
  use_cache=True,
95
  )
96
  s = gen_outputs.sequences[0]