Update README.md
Browse files
README.md
CHANGED
@@ -4,4 +4,34 @@ tags:
|
|
4 |
- text generation
|
5 |
- RAG
|
6 |
- baichuan2
|
7 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
- text generation
|
5 |
- RAG
|
6 |
- baichuan2
|
7 |
+
---
|
8 |
+
|
9 |
+
This model is a 7B Chinese version of [Self-RAG](https://huggingface.co/selfrag/selfrag_llama2_7b).
|
10 |
+
|
11 |
+
It is trained on Baichuan2-7B-Chat with a sample of [belle](https://github.com/LianjiaTech/BELLE) sft data, acompanying with interleaving passages from zhwiki. The reflection tokens are aligned with the original verison (in English), so the usage is the same. Hope you enjoy.
|
12 |
+
|
13 |
+
### Usage
|
14 |
+
```
|
15 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
16 |
+
from vllm import LLM, SamplingParams
|
17 |
+
|
18 |
+
model = LLM(YOUR_MODEL_PATH, dtype="half")
|
19 |
+
sampling_params = SamplingParams(temperature=0.0, top_p=1.0, max_tokens=100, skip_special_tokens=False)
|
20 |
+
|
21 |
+
def format_prompt(input, paragraph=None):
|
22 |
+
prompt = "### Instruction:\n{0}\n\n### Response:\n".format(input)
|
23 |
+
if paragraph is not None:
|
24 |
+
prompt += "[Retrieval]<paragraph>{0}</paragraph>".format(paragraph)
|
25 |
+
return prompt
|
26 |
+
|
27 |
+
query_1 = "你好"
|
28 |
+
query_2 = "世界最高的山峰是什么?"
|
29 |
+
queries = [query_1, query_2]
|
30 |
+
|
31 |
+
preds = model.generate([format_prompt(query) for query in queries], sampling_params)
|
32 |
+
for pred in preds:
|
33 |
+
print("Model prediction: {0}".format(pred.outputs[0].text))
|
34 |
+
# Model prediction:
|
35 |
+
|
36 |
+
```
|
37 |
+
|