smangrul commited on
Commit
935e966
·
1 Parent(s): dad9fec

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -0
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## Inference Code
3
+
4
+ ```
5
+ import os
6
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Optional
10
+
11
+ import torch
12
+ from datasets import load_dataset
13
+ from peft import LoraConfig
14
+ from transformers import (
15
+ AutoModelForCausalLM,
16
+ AutoTokenizer,
17
+ BitsAndBytesConfig,
18
+ HfArgumentParser,
19
+ AutoTokenizer,
20
+ TrainingArguments,
21
+ )
22
+
23
+ from trl import SFTTrainer
24
+
25
+ from peft import (
26
+ prepare_model_for_kbit_training,
27
+ LoraConfig,
28
+ get_peft_model,
29
+ PeftModel
30
+ )
31
+
32
+ bnb_config = BitsAndBytesConfig(
33
+ load_in_4bit=True,
34
+ bnb_4bit_quant_type="nf4",
35
+ bnb_4bit_compute_dtype="bfloat16",
36
+ bnb_4bit_use_double_quant=False,
37
+ )
38
+
39
+ device_map = {"": 0}
40
+ tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-40b")
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ "tiiuae/falcon-40b", quantization_config=bnb_config, device_map=device_map, trust_remote_code=True
43
+ )
44
+ model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)
45
+ model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer"
46
+ model = PeftModel.from_pretrained(model, model_id)
47
+
48
+ text = '### Human: Write a tweet celebrating the Apache-2 release of Falcon models which are generative Large Language Models (LLMs) on which you have been finetuned. Previously, it was under a bit of a restrictive license. Make the tweet punchy, energetic, exciting and marketable.### Assitant:'
49
+ outputs = model.generate(input_ids=tokenizer(text, return_tensors="pt").input_ids,
50
+ max_new_tokens=256,
51
+ temperature=0.7,
52
+ top_p=0.9,
53
+ do_sample=True)
54
+
55
+ print(tokenizer.batch_decode(outputs))
56
+
57
+ ```