Chinese
English
34.2M
Alic-Li commited on
Commit
2dd2cd9
·
verified ·
1 Parent(s): 1b5c9e2

Upload 4 files

Browse files
API_DEMO_CHAT.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ########################################################################################################
2
+ # The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
3
+ ########################################################################################################
4
+
5
+ print("RWKV Chat Simple Demo")
6
+
7
+ import os, copy, types, gc, sys, re
8
+ import numpy as np
9
+ from prompt_toolkit import prompt
10
+ import torch
11
+ from transformers import AutoTokenizer
12
+
13
+ torch.backends.cudnn.benchmark = True
14
+ torch.backends.cudnn.allow_tf32 = True
15
+ torch.backends.cuda.matmul.allow_tf32 = True
16
+ os.environ["RWKV_V7_ON"] = "1" # enable this for rwkv-7 models
17
+ os.environ["RWKV_JIT_ON"] = "1"
18
+ os.environ["RWKV_CUDA_ON"] = "0" # !!! '1' to compile CUDA kernel (10x faster), requires c++ compiler & cuda libraries !!!
19
+
20
+ from rwkv.model import RWKV
21
+ from rwkv.utils import PIPELINE
22
+
23
+ ########################################################################################################
24
+
25
+ args = types.SimpleNamespace()
26
+
27
+ args.strategy = "cuda fp16" # use CUDA, fp16
28
+
29
+ args.MODEL_NAME = "/home/alic-li/rwkv7-g1-tanslate/rwkv-final-sft-512"
30
+
31
+
32
+ ########################################################################################################
33
+ STATE_NAME = None # use vanilla zero initial state?
34
+
35
+ # use custom state? much better chat results (download from https://huggingface.co/BlinkDL/temp-latest-training-models/tree/main)
36
+ # note: this is English Single-round QA state (will forget what you previously say)
37
+ # STATE_NAME = "E://RWKV-Runner//models//rwkv-x060-eng_single_round_qa-1B6-20240516-ctx2048"
38
+ ########################################################################################################
39
+
40
+ GEN_TEMP = 1.0
41
+ GEN_TOP_P = 0.3
42
+ GEN_alpha_presence = 0.5
43
+ GEN_alpha_frequency = 0.5
44
+ GEN_penalty_decay = 0.996
45
+
46
+ if STATE_NAME != None:
47
+ GEN_TOP_P = 0.2
48
+ GEN_alpha_presence = 0.3
49
+ GEN_alpha_frequency = 0.3
50
+
51
+ CHUNK_LEN = 16 # split input into chunks to save VRAM (shorter -> slower, but saves VRAM)
52
+
53
+ ########################################################################################################
54
+
55
+ print(f"Loading model - {args.MODEL_NAME}")
56
+ model = RWKV(model=args.MODEL_NAME, strategy=args.strategy)
57
+ pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
58
+ tokenizer = AutoTokenizer.from_pretrained("./MiniMind2_tokenizer")
59
+
60
+ model_tokens = []
61
+ model_state = None
62
+
63
+ if STATE_NAME != None: # load custom state
64
+ args = model.args
65
+ state_raw = torch.load(STATE_NAME + '.pth')
66
+ state_init = [None for i in range(args.n_layer * 3)]
67
+ for i in range(args.n_layer):
68
+ dd = model.strategy[i]
69
+ dev = dd.device
70
+ atype = dd.atype
71
+ state_init[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
72
+ state_init[i*3+1] = state_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
73
+ state_init[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
74
+ model_state = copy.deepcopy(state_init)
75
+
76
+ def run_rnn(ctx):
77
+ global model_tokens, model_state
78
+
79
+ ctx = ctx.replace("\r\n", "\n")
80
+
81
+ tokens = tokenizer.encode(ctx)
82
+ tokens = [int(x) for x in tokens]
83
+ model_tokens += tokens
84
+
85
+ # print(f"### model ###\n{model_tokens}\n[{pipeline.decode(model_tokens)}]") # debug
86
+
87
+ while len(tokens) > 0:
88
+ out, model_state = model.forward(tokens[:CHUNK_LEN], model_state)
89
+ tokens = tokens[CHUNK_LEN:]
90
+
91
+ return out
92
+
93
+ if STATE_NAME == None: # use initial prompt if we are not loading a state
94
+ init_ctx = "User: hi" + "\n\n"
95
+ init_ctx += "Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it." + "\n\n"
96
+ # run_rnn(init_ctx)
97
+ # print(init_ctx, end="")
98
+
99
+ while True:
100
+ msg = prompt("<|im_start|>user:")
101
+ msg = msg.strip()
102
+ msg = re.sub(r"\n+", "\n", msg)
103
+ if len(msg) > 0:
104
+ occurrence = {}
105
+ out_tokens = []
106
+ out_last = 0
107
+
108
+ out = run_rnn("<|im_start|>user\n" + msg + "<|im_end|>\n" + "<|im_start|>assistant\n")
109
+ print("\nAssistant:", end="")
110
+
111
+ eos_token_id = tokenizer.eos_token_id
112
+ pad_token_id = tokenizer.pad_token_id
113
+
114
+ for i in range(99999):
115
+ for n in occurrence:
116
+ out[n] -= GEN_alpha_presence + occurrence[n] * GEN_alpha_frequency # repetition penalty
117
+ out[0] -= 1e10 # disable END_OF_TEXT
118
+
119
+ token = pipeline.sample_logits(out, temperature=GEN_TEMP, top_p=GEN_TOP_P)
120
+
121
+ out, model_state = model.forward([token], model_state)
122
+ model_tokens += [token]
123
+
124
+ out_tokens += [token]
125
+
126
+ for xxx in occurrence:
127
+ occurrence[xxx] *= GEN_penalty_decay
128
+ occurrence[token] = 1 + (occurrence[token] if token in occurrence else 0)
129
+
130
+ tmp = tokenizer.decode(out_tokens[out_last:])
131
+ if ("\ufffd" not in tmp) and (not tmp.endswith("\n")):
132
+ print(tmp, end="", flush=True)
133
+ out_last = i + 1
134
+
135
+ # 使用 token_id 判断是否为 eos_token
136
+ if token == eos_token_id:
137
+ print(tmp, end="\n\n", flush=True)
138
+ break
139
+ else:
140
+ print("!!! Error: please say something !!!")
MiniMind2_tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|im_start|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
MiniMind2_tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
MiniMind2_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|im_start|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<|im_end|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<|im_start|>",
33
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% else %}{{ '<|im_start|>system\\nYou are a helpful assistant<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "<|im_end|>",
36
+ "extra_special_tokens": {},
37
+ "legacy": true,
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "sp_model_kwargs": {},
41
+ "spaces_between_special_tokens": false,
42
+ "tokenizer_class": "PreTrainedTokenizer",
43
+ "unk_token": "<|endoftext|>"
44
+ }