sag-uniroma2
/

u-depp-llama-2-13b

Model card Files Files and versions Community

cdh commited on Sep 24, 2024

Commit

543eb13

·

verified ·

1 Parent(s): 29d5001

Update README.md

Files changed (1) hide show

README.md +22 -2

README.md CHANGED Viewed

@@ -42,6 +42,18 @@ For more details, please consult the associated [Github repository](https://gith
 # How to use it
 ```Python
 model = AutoModelForCausalLM.from_pretrained(
             "meta-llama/Llama-2-13b-hf",
             load_in_4bit=True,
@@ -55,13 +67,21 @@ model = PeftModel.from_pretrained(
             "sag-uniroma2/u-depp-llama-2-13b"
         )
 input_string = "He was most widely recognized for some of his books."
 prompt = f"""
 ### Input:
 {input_string}
 ### Answer:"""
-inputs = tokenizer(prompt, return_tensors="pt", padding=do_padding, truncation=True, max_length=CUTOFF_LEN)
 input_ids = inputs["input_ids"].to(model.device)
 with torch.no_grad():
@@ -70,7 +90,7 @@ with torch.no_grad():
         generation_config=generation_config,
         return_dict_in_generate=True,
         output_scores=True,
-        max_new_tokens=MAX_NEW_TOKENS,
         use_cache=True,
     )
 s = gen_outputs.sequences[0]

 # How to use it
 ```Python
+import transformers
+import torch
+from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+from peft import PeftModel
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
 model = AutoModelForCausalLM.from_pretrained(
             "meta-llama/Llama-2-13b-hf",
             load_in_4bit=True,
             "sag-uniroma2/u-depp-llama-2-13b"
         )
+generation_config = GenerationConfig(
+        num_beams=4,
+        do_sample=False,
+        early_stopping=True,
+    )
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-hf", trust_remote_code=True)
 input_string = "He was most widely recognized for some of his books."
 prompt = f"""
 ### Input:
 {input_string}
 ### Answer:"""
+inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
 input_ids = inputs["input_ids"].to(model.device)
 with torch.no_grad():
         generation_config=generation_config,
         return_dict_in_generate=True,
         output_scores=True,
+        max_new_tokens=1024,
         use_cache=True,
     )
 s = gen_outputs.sequences[0]