Alic-Li commited on
Commit
f95603c
·
verified ·
1 Parent(s): 395286c

Upload 6 files

Browse files
MiniMind2_tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|im_start|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
MiniMind2_tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
MiniMind2_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|im_start|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<|im_end|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<|im_start|>",
33
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% else %}{{ '<|im_start|>system\\nYou are a helpful assistant<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "<|im_end|>",
36
+ "extra_special_tokens": {},
37
+ "legacy": true,
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "sp_model_kwargs": {},
41
+ "spaces_between_special_tokens": false,
42
+ "tokenizer_class": "PreTrainedTokenizer",
43
+ "unk_token": "<|endoftext|>"
44
+ }
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import copy
3
+ import types
4
+ import torch
5
+ from transformers import AutoTokenizer
6
+ import gradio as gr
7
+
8
+ os.environ["RWKV_V7_ON"] = "1"
9
+ os.environ["RWKV_JIT_ON"] = "1"
10
+ os.environ["RWKV_CUDA_ON"] = "0"
11
+
12
+ from rwkv.model import RWKV
13
+ from rwkv.utils import PIPELINE
14
+
15
+ args = types.SimpleNamespace()
16
+ args.strategy = "cuda fp16"
17
+ args.MODEL_NAME = "./rwkv-final-sft-2048"
18
+
19
+ STATE_NAME = None
20
+ GEN_TEMP = 1.0
21
+ GEN_TOP_P = 0.3
22
+ GEN_alpha_presence = 0.5
23
+ GEN_alpha_frequency = 0.5
24
+ GEN_penalty_decay = 0.996
25
+ CHUNK_LEN = 16
26
+
27
+ print(f"Loading model - {args.MODEL_NAME}")
28
+ model = RWKV(model=args.MODEL_NAME, strategy=args.strategy)
29
+ pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
30
+ tokenizer = AutoTokenizer.from_pretrained("./MiniMind2_tokenizer")
31
+
32
+ model_tokens = []
33
+ model_state = None
34
+
35
+ if STATE_NAME is not None:
36
+ GEN_TOP_P = 0.2
37
+ GEN_alpha_presence = 0.3
38
+ GEN_alpha_frequency = 0.3
39
+
40
+ args = model.args
41
+ state_raw = torch.load(STATE_NAME + '.pth')
42
+ state_init = [None for i in range(args.n_layer * 3)]
43
+ for i in range(args.n_layer):
44
+ dd = model.strategy[i]
45
+ dev = dd.device
46
+ atype = dd.atype
47
+ state_init[i*3+0] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
48
+ state_init[i*3+1] = state_raw[f'blocks.{i}.att.time_state'].transpose(1,2).to(dtype=torch.float, device=dev).requires_grad_(False).contiguous()
49
+ state_init[i*3+2] = torch.zeros(args.n_embd, dtype=atype, requires_grad=False, device=dev).contiguous()
50
+ model_state = copy.deepcopy(state_init)
51
+
52
+ def run_rnn(ctx, state):
53
+ ctx = ctx.replace("\r\n", "\n")
54
+ tokens = tokenizer.encode(ctx)
55
+ tokens = [int(x) for x in tokens]
56
+
57
+ current_state = copy.deepcopy(state) if state is not None else None
58
+
59
+ while len(tokens) > 0:
60
+ out, current_state = model.forward(tokens[:CHUNK_LEN], current_state)
61
+ tokens = tokens[CHUNK_LEN:]
62
+
63
+ return out, current_state
64
+
65
+ def generate_response(message, history, temperature=1.0, top_p=0.3):
66
+ global model_tokens, model_state
67
+
68
+ ctx = ""
69
+ for human, assistant in history:
70
+ ctx += f"<|im_start|>user\n{human}<|im_end|>\n<|im_start|>assistant\n{assistant}<!--eos--><|im_end|>\n"
71
+
72
+ ctx += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
73
+
74
+ out, model_state = run_rnn(ctx, model_state)
75
+
76
+ occurrence = {}
77
+ out_tokens = []
78
+ out_last = 0
79
+ response = ""
80
+
81
+ eos_token_id = tokenizer.eos_token_id
82
+ im_end_id = tokenizer.encode("<|im_end|>")[0]
83
+ for i in range(99999):
84
+ logits = out.clone()
85
+ for n in occurrence:
86
+ logits[n] -= GEN_alpha_presence + occurrence[n] * GEN_alpha_frequency
87
+
88
+ logits[0] -= 1e10
89
+
90
+ token = pipeline.sample_logits(logits, temperature=temperature, top_p=top_p)
91
+
92
+ if token == im_end_id:
93
+ break
94
+
95
+ out, model_state = model.forward([token], model_state)
96
+
97
+ out_tokens += [token]
98
+ for xxx in occurrence:
99
+ occurrence[xxx] *= GEN_penalty_decay
100
+ occurrence[token] = 1 + (occurrence[token] if token in occurrence else 0)
101
+
102
+ tmp = tokenizer.decode(out_tokens[out_last:])
103
+ if "\ufffd" not in tmp:
104
+ response += tmp
105
+ cleaned_response = response.replace("<|im_end|>", "")
106
+ yield cleaned_response
107
+ out_last = i + 1
108
+
109
+ if token == eos_token_id:
110
+ break
111
+
112
+ def chat_with_bot(message, history, temperature, top_p):
113
+ response = ""
114
+ for partial_response in generate_response(message, history, temperature, top_p):
115
+ response = partial_response
116
+ yield response
117
+
118
+ with gr.Blocks(title="MiniRWKV_7 34.2M 🪿 2vGPU Space") as demo:
119
+ gr.Markdown("# MiniRWKV_7 34.2M 🪿 ")
120
+ gr.Markdown("### Only 34.2M Params!!! Use 2V CPU Backend to run this model. ")
121
+
122
+ with gr.Row():
123
+ with gr.Column(scale=3):
124
+ chatbot = gr.Chatbot(
125
+ label="对话记录",
126
+ height=500,
127
+ )
128
+
129
+ with gr.Column(scale=1):
130
+ msg = gr.Textbox(
131
+ label="输入消息",
132
+ placeholder="请输入您的问题...",
133
+ lines=3
134
+ )
135
+
136
+ with gr.Row():
137
+ send_btn = gr.Button("发送", variant="primary")
138
+ clear_btn = gr.Button("清除历史")
139
+
140
+ gr.Markdown("### 参数调节")
141
+ temperature_slider = gr.Slider(
142
+ minimum=0.1,
143
+ maximum=2.0,
144
+ value=GEN_TEMP,
145
+ step=0.1,
146
+ label="Temperature"
147
+ )
148
+ top_p_slider = gr.Slider(
149
+ minimum=0.0,
150
+ maximum=2.0,
151
+ value=GEN_TOP_P,
152
+ step=0.05,
153
+ label="Top-P"
154
+ )
155
+
156
+
157
+ def respond(message, chat_history, temperature, top_p):
158
+ if not message:
159
+ return "", chat_history
160
+
161
+ chat_history.append((message, ""))
162
+
163
+ response = ""
164
+ for partial_response in chat_with_bot(message, chat_history[:-1], temperature, top_p):
165
+ response = partial_response
166
+ cleaned_response = response.replace("<|im_end|>", "")
167
+ chat_history[-1] = (message, cleaned_response)
168
+ yield "", chat_history
169
+
170
+ def clear_history():
171
+ global model_tokens, model_state
172
+ model_tokens = []
173
+ model_state = None
174
+ return []
175
+
176
+ msg.submit(respond, [msg, chatbot, temperature_slider, top_p_slider], [msg, chatbot])
177
+ send_btn.click(respond, [msg, chatbot, temperature_slider, top_p_slider], [msg, chatbot])
178
+ clear_btn.click(clear_history, None, chatbot)
179
+
180
+ if __name__ == "__main__":
181
+ demo.launch(server_name="127.0.0.1", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch --index-url https://download.pytorch.org/whl/cpu
2
+ rwkv
3
+ transformers
rwkv-final-sft-2048.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09459cc9b8cf413e71ab867d7be5673f4d5b554d8fb87cf8669e4aa34599152f
3
+ size 68354364