|
from transformers import PreTrainedTokenizerFast |
|
|
|
class PAlignerTokenizer(PreTrainedTokenizerFast): |
|
def __init__(self, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
|
|
def get_context( |
|
self, |
|
raw_instruction, |
|
): |
|
prompt = "You are an expert prompt engineer. " |
|
prompt += "Please help me optimize this prompt to get better response:\n\n[The Start of Raw Prompt]\n{}\n[The End of Raw Prompt]".format(raw_instruction) |
|
context = self.apply_chat_template( |
|
[ |
|
{ |
|
"role": "user", |
|
"content": prompt, |
|
} |
|
], |
|
add_generation_prompt=True, |
|
tokenize=False, |
|
) + "The Optimized Prompt:\n\n[The Start of Optimized Prompt" |
|
|
|
return context |
|
|
|
def parse_output( |
|
self, |
|
output_text, |
|
raw_instruction = "", |
|
): |
|
better_instruction = "The Optimized Prompt:\n\n[The Start of Optimized Prompt" + output_text |
|
|
|
if "[The Start of Optimized Prompt]" in better_instruction: |
|
better_instruction = better_instruction[better_instruction.index("[The Start of Optimized Prompt]") + len("[The Start of Optimized Prompt]"):] |
|
if better_instruction.startswith("\n"): |
|
better_instruction = better_instruction[1:] |
|
if "[The End of Optimized Prompt]" in better_instruction: |
|
better_instruction = better_instruction[:better_instruction.index("[The End of Optimized Prompt]")] |
|
if better_instruction.endswith("\n"): |
|
better_instruction = better_instruction[:-1] |
|
if "The Optimized Prompt:" in better_instruction: |
|
better_instruction = better_instruction[:better_instruction.index("The Optimized Prompt:")] |
|
|
|
if better_instruction.strip() == "": |
|
better_instruction = raw_instruction |
|
|
|
if "The Optimized" in better_instruction: |
|
better_instruction = raw_instruction |
|
|
|
return better_instruction |