doubledsbv commited on
Commit
3a5baef
·
verified ·
1 Parent(s): 42346cf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -0
README.md CHANGED
@@ -77,8 +77,43 @@ Near‑linear scaling thanks to sharded ZeRO‑3 + RCCL optimisations.
77
 
78
  # Inference
79
 
 
80
  ### Offline
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  ## vLLM
84
 
 
77
 
78
  # Inference
79
 
80
+
81
  ### Offline
82
 
83
+ ```python
84
+ model_name = "seedboxai/KafkaLM-15B"
85
+
86
+ # load the tokenizer and the model
87
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
88
+ model = AutoModelForCausalLM.from_pretrained(
89
+ model_name,
90
+ torch_dtype="auto",
91
+ device_map="auto"
92
+ )
93
+
94
+ # prepare the model input
95
+ prompt = "Why did Kafka hit different?"
96
+ messages = [
97
+ {"role": "user", "content": prompt}
98
+ ]
99
+ text = tokenizer.apply_chat_template(
100
+ messages,
101
+ tokenize=False,
102
+ add_generation_prompt=True,
103
+ )
104
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
105
+
106
+ # conduct text completion
107
+ generated_ids = model.generate(
108
+ **model_inputs,
109
+ max_new_tokens=1024
110
+ )
111
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
112
+
113
+ response = tokenizer.decode(output_ids, skip_special_tokens=True)
114
+
115
+ print(response)
116
+ ```
117
 
118
  ## vLLM
119