from transformers import PreTrainedTokenizerFast class PAlignerTokenizer(PreTrainedTokenizerFast): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_context( self, raw_instruction, ): prompt = "You are an expert prompt engineer. " prompt += "Please help me optimize this prompt to get better response:\n\n[The Start of Raw Prompt]\n{}\n[The End of Raw Prompt]".format(raw_instruction) context = self.apply_chat_template( [ { "role": "user", "content": prompt, } ], add_generation_prompt=True, tokenize=False, ) + "The Optimized Prompt:\n\n[The Start of Optimized Prompt" return context def parse_output( self, output_text, raw_instruction = "", # recommend to provide, so when some error happened, we can still use the raw instruction ): better_instruction = "The Optimized Prompt:\n\n[The Start of Optimized Prompt" + output_text if "[The Start of Optimized Prompt]" in better_instruction: better_instruction = better_instruction[better_instruction.index("[The Start of Optimized Prompt]") + len("[The Start of Optimized Prompt]"):] if better_instruction.startswith("\n"): better_instruction = better_instruction[1:] if "[The End of Optimized Prompt]" in better_instruction: better_instruction = better_instruction[:better_instruction.index("[The End of Optimized Prompt]")] if better_instruction.endswith("\n"): better_instruction = better_instruction[:-1] if "The Optimized Prompt:" in better_instruction: # almost error happened better_instruction = better_instruction[:better_instruction.index("The Optimized Prompt:")] if better_instruction.strip() == "": # some error may happen in optimization, so use the raw instruction better_instruction = raw_instruction if "The Optimized" in better_instruction: # still some error happened better_instruction = raw_instruction return better_instruction