doubledsbv commited on
Commit
99786ed
·
verified ·
1 Parent(s): 0bba440

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -1
README.md CHANGED
@@ -157,6 +157,9 @@ from huggingface_hub import snapshot_download
157
  from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
158
  from vllm.lora.request import LoRARequest
159
 
 
 
 
160
  def create_test_prompts(lora_path: str) -> list[tuple[str, SamplingParams, Optional[LoRARequest]]]:
161
  """Create a list of test prompts with their sampling parameters.
162
  1 requests for base model, 1 request for the LoRA.
@@ -173,6 +176,7 @@ def create_test_prompts(lora_path: str) -> list[tuple[str, SamplingParams, Optio
173
  top_k=20,
174
  logprobs=1,
175
  prompt_logprobs=1,
 
176
  max_tokens=4096),
177
  LoRARequest("reasoning-lora", 1, lora_path)),
178
 
@@ -187,7 +191,8 @@ def process_requests(engine: LLMEngine,
187
 
188
  while test_prompts or engine.has_unfinished_requests():
189
  if test_prompts:
190
- prompt, sampling_params, lora_request = test_prompts.pop(0)
 
191
  engine.add_request(str(request_id),
192
  prompt,
193
  sampling_params,
 
157
  from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
158
  from vllm.lora.request import LoRARequest
159
 
160
+ SYS_MESSAGE = 'A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>.'
161
+ tokenizer = transformers.AutoTokenizer.from_pretrained("")
162
+
163
  def create_test_prompts(lora_path: str) -> list[tuple[str, SamplingParams, Optional[LoRARequest]]]:
164
  """Create a list of test prompts with their sampling parameters.
165
  1 requests for base model, 1 request for the LoRA.
 
176
  top_k=20,
177
  logprobs=1,
178
  prompt_logprobs=1,
179
+ stop=["</s>", "<eos>"],
180
  max_tokens=4096),
181
  LoRARequest("reasoning-lora", 1, lora_path)),
182
 
 
191
 
192
  while test_prompts or engine.has_unfinished_requests():
193
  if test_prompts:
194
+ input, sampling_params, lora_request = test_prompts.pop(0)
195
+ prompt = tokenizer.apply_chat_template([{'role':'system', 'content': SYS_MESSAGE},{'role': 'user', 'content': input}], tokenize = False, add_generation_prompt = True)
196
  engine.add_request(str(request_id),
197
  prompt,
198
  sampling_params,