Restore all essential files - code, configs, and MBPP/HumanEval data

24c2665 verified 3 months ago

16.8 kB

	"""
	TestTime RLVR 프롬프트 중앙 관리 시스템

	모든 프롬프트를 한 곳에서 관리하여 일관성과 유지보수성을 향상시킵니다.
	"""

	from typing import Dict, List, Any
	from dataclasses import dataclass
	from enum import Enum


	class PromptType(Enum):
	"""프롬프트 유형 정의"""
	SOLUTION_GENERATION = "solution_generation"
	DIVERSE_GENERATION = "diverse_generation"
	INPUT_GENERATION = "input_generation"
	TASK_GENERATION = "task_generation"
	TASK_EVALUATION = "task_evaluation"


	class BenchmarkType(Enum):
	"""벤치마크 유형 정의"""
	HUMANEVAL = "humaneval"
	MBPP = "mbpp"
	GENERAL = "general"


	@dataclass
	class PromptTemplate:
	"""프롬프트 템플릿 데이터 클래스"""
	name: str
	template: str
	description: str
	benchmark: BenchmarkType
	temperature: float = 0.05
	variables: List[str] = None

	def __post_init__(self):
	if self.variables is None:
	self.variables = []


	class PromptManager:
	"""프롬프트 중앙 관리 클래스"""

	def __init__(self):
	self.prompts = self._initialize_prompts()

	def _initialize_prompts(self) -> Dict[str, PromptTemplate]:
	"""모든 프롬프트 템플릿 초기화"""

	prompts = {}

	# ================================================================================
	# 1. SOLUTION GENERATION PROMPTS (Current Evaluation - 베이스라인)
	# ================================================================================

	# HumanEval 기본 솔루션 생성
	prompts["solution_humaneval_basic"] = PromptTemplate(
	name="HumanEval 기본 솔루션 생성",
	benchmark=BenchmarkType.HUMANEVAL,
	temperature=0.05,
	description="HumanEval 문제에 대한 기본 솔루션 생성 (greedy)",
	variables=["problem_prompt"],
	template="""You are a Python writing assistant. Complete the following Python function.

	{problem_prompt}

	Please provide a complete implementation of the function."""
	)

	# HumanEval 다중 함수 처리
	prompts["solution_humaneval_multi"] = PromptTemplate(
	name="HumanEval 다중 함수 솔루션 생성",
	benchmark=BenchmarkType.HUMANEVAL,
	temperature=0.05,
	description="여러 함수가 있는 HumanEval 문제 처리",
	variables=["problem_prompt", "entry_point"],
	template="""You are a Python writing assistant. Complete the following Python function.

	{problem_prompt}

	Please provide ONLY the implementation for the function `{entry_point}`.
	Complete the body of the `{entry_point}` function where it is incomplete.
	Do not modify or reimplement other functions that are already complete."""
	)

	# MBPP 기본 솔루션 생성
	prompts["solution_mbpp_basic"] = PromptTemplate(
	name="MBPP 기본 솔루션 생성",
	benchmark=BenchmarkType.MBPP,
	temperature=0.05,
	description="MBPP 문제에 대한 기본 솔루션 생성",
	variables=["problem_prompt"],
	template="""
	Please generate a complete, self-contained Python script that solves the following problem.

	CRITICAL REQUIREMENTS:
	- You MUST maintain the EXACT function signature as shown in the examples
	- The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples
	- Look at the assert statements carefully to understand the expected function signature
	- DO NOT change the number of parameters or their types from what is shown in the examples

	Instructions:
	- Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```).
	- For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line.
	The docstring should briefly describe:
	• The function's purpose
	• Input parameters
	• Return value

	Problem statement:
	{problem_prompt}
	"""
	)

	# ================================================================================
	# 2. DIVERSE GENERATION PROMPTS (다양한 프로그램 생성)
	# ================================================================================

	# HumanEval 다양성 솔루션
	prompts["diverse_humaneval_basic"] = PromptTemplate(
	name="HumanEval 다양성 솔루션 생성",
	benchmark=BenchmarkType.HUMANEVAL,
	temperature=0.7,
	description="HumanEval 문제에 대한 다양한 접근법 솔루션",
	variables=["diversity_instruction", "problem_prompt"],
	template="""You are a Python writing assistant. {diversity_instruction}

	{problem_prompt}

	Please provide a complete implementation of the function."""
	)

	# HumanEval 다양성 다중 함수
	prompts["diverse_humaneval_multi"] = PromptTemplate(
	name="HumanEval 다양성 다중 함수 솔루션",
	benchmark=BenchmarkType.HUMANEVAL,
	temperature=0.7,
	description="다중 함수 HumanEval에 대한 다양성 솔루션",
	variables=["diversity_instruction", "problem_prompt", "entry_point"],
	template="""You are a Python writing assistant. {diversity_instruction}

	{problem_prompt}

	Please provide ONLY the implementation for the function `{entry_point}`.
	Complete the body of the `{entry_point}` function where it is incomplete.
	Do not modify or reimplement other functions that are already complete."""
	)

	# MBPP 다양성 솔루션
	prompts["diverse_mbpp_basic"] = PromptTemplate(
	name="MBPP 다양성 솔루션 생성",
	benchmark=BenchmarkType.MBPP,
	temperature=0.7,
	description="MBPP 문제에 대한 다양한 접근법 솔루션",
	variables=["diversity_instruction", "problem_prompt"],
	template="""Please generate a complete, self-contained Python script that solves the following problem.

	CRITICAL REQUIREMENTS:
	- You MUST maintain the EXACT function signature as shown in the examples
	- The function name, parameter names, parameter types, and parameter count MUST match exactly with the examples
	- Look at the assert statements carefully to understand the expected function signature
	- DO NOT change the number of parameters or their types from what is shown in the examples

	Instructions:
	- Wrap the entire script in a Markdown code block with syntax highlighting (```python ... ```).
	- For each function, include a concise docstring enclosed in triple single quotes (''' ... '''), placed immediately below the def line.
	The docstring should briefly describe:
	• The function's purpose
	• Input parameters
	• Return value

	{diversity_instruction}

	Problem statement:
	{problem_prompt}
	"""
	)

	# ================================================================================
	# 3. INPUT GENERATION PROMPTS (입력 증강)
	# ================================================================================

	prompts["input_generation_basic"] = PromptTemplate(
	name="기본 입력 생성",
	benchmark=BenchmarkType.GENERAL,
	temperature=0.5,
	description="기존 IPO 예제를 바탕으로 새로운 입력 생성",
	variables=["problem_description", "existing_examples", "full_code", "arg_type_info"],
	template="""Given the following problem description and its Python function implementation, first analyze the types and valid ranges of the function arguments, then write 5 different example inputs for the function that cover a diverse mix of typical (general) cases and edge/boundary cases.

	Problem Description:
	'''
	{problem_description}
	'''

	Existing Examples from Problem:
	{existing_examples}

	Function Implementation:
	```python
	{full_code}
	```

	{arg_type_info}

	Based on the existing examples above, generate 5 NEW diverse test inputs that are different from the existing ones. Each input should be a Python dict where:
	- Keys are the exact parameter names from the function signature
	- Values are appropriate test values for each parameter

	Format your response as:
	```python
	examples = [
	{{dict_with_all_function_parameters}}, # Description of this test case
	{{dict_with_all_function_parameters}}, # Description of this test case
	... # Continue for all 5 examples
	]
	```

	Ensure your examples include:
	- At least 2 typical/general cases
	- At least 2 edge/boundary cases
	- 1 special case (empty, zero, maximum values, etc.)
	- All examples should be DIFFERENT from the existing examples shown above"""
	)

	# ================================================================================
	# 4. TASK GENERATION PROMPTS (IPO → 추론 태스크)
	# ================================================================================

	prompts["task_induction"] = PromptTemplate(
	name="Induction 태스크 생성 (AZR code_f)",
	benchmark=BenchmarkType.GENERAL,
	temperature=0.05,
	description="주어진 입력-출력으로부터 프로그램 추론 (AZR 원본)",
	variables=["input_output_pairs", "message"],
	template="""A conversation between User and Assistant.
	The User provides a set of input/output pairs and a message describing the hidden function. The Assistant must:
	1. Privately think step-by-step about how to reconstruct the general function based on the provided examples.
	2. Output exactly one `<think>...</think>` block containing the full reasoning process.
	3. Then output exactly one `<answer>...</answer>` block containing only the Python code snippet defining the function `f`—no labels, no comments, no extra text.
	4. Do not generate any text outside these two blocks.
	5. Follow to the code requirements and formatting rules.

	# Code Requirements:
	- Name the entry function `f` (e.g., `def f(...): ...`), you may include nested definitions inside `f`.
	- Ensure the function returns a value.
	- Include at least one input parameter.
	- Make the function deterministic.
	- AVOID the FOLLOWING:
	* Random functions or variables
	* Date/time operations
	* I/O operations (reading files, network requests)
	* Printing or logging
	* Any external state
	- Ensure execution completes within 10 seconds on a modern CPU.
	- All imports and custom class definitions must be at the very top of the code snippet.
	- The snippet must end with a return statement from the main function `f`; anything after will be removed.

	User:
	# Input and Output Pairs:
	{input_output_pairs}

	# Message:
	{message}"""
	)

	prompts["task_deduction"] = PromptTemplate(
	name="Deduction 태스크 생성 (AZR code_o)",
	benchmark=BenchmarkType.GENERAL,
	temperature=0.05,
	description="주어진 프로그램과 입력으로부터 출력 추론 (AZR 원본)",
	variables=["snippet", "input_args"],
	template="""A conversation between User and Assistant.
	The User provides a Python code snippet and specific input values. The Assistant must:
	1. Privately think step-by-step about how the code executes with the given inputs.
	2. Output exactly one `<think>...</think>` block containing your full reasoning.
	3. Then output exactly one `<answer>...</answer>` block containing only the output values—no labels, no comments, no extra text.
	4. Do not generate any text outside these two blocks.
	5. Adhere to the output rules.

	# Output Rules:
	- If the output is a string, wrap it in quotes.
	- For dicts, lists, and other literals, use valid Python literal notation.

	User:
	# Python Code Snippet:
	{snippet}

	# Input:
	{input_args}"""
	)

	prompts["task_abduction"] = PromptTemplate(
	name="Abduction 태스크 생성 (AZR code_i)",
	benchmark=BenchmarkType.GENERAL,
	temperature=0.05,
	description="주어진 프로그램과 출력으로부터 입력 추론 (AZR 원본)",
	variables=["snippet", "output"],
	template="""A conversation between User and Assistant.
	The User provides a Python code snippet and its observed output. The Assistant must:
	1. Privately think step-by-step about which input produces that output.
	2. Output exactly one `<think>...</think>` block containing your full reasoning.
	3. Then output exactly one `<answer>...</answer>` block containing only the input values—no labels, no comments, no extra text.
	4. Do not generate any text outside these two blocks.
	5. Adhere to the input rules.

	# Input Rules:
	- If an argument is a string, wrap it in quotes.
	- For multiple arguments, separate by commas.
	- Use Python literal notation for lists, dicts, tuples.
	- Boolean values must be `True` or `False`.

	User:
	# Python Code Snippet:
	{snippet}

	# Observed Output:
	{output}"""
	)

	# ================================================================================
	# 5. TASK EVALUATION PROMPTS (LLM 태스크 응답)
	# ================================================================================

	prompts["task_evaluation_basic"] = PromptTemplate(
	name="기본 태스크 평가",
	benchmark=BenchmarkType.GENERAL,
	temperature=0.05,
	description="생성된 추론 태스크에 대한 LLM 응답",
	variables=["task_prompt"],
	template="{task_prompt}"
	)

	return prompts

	def get_prompt(self, prompt_key: str, **kwargs) -> str:
	"""프롬프트 키로 템플릿을 가져와 변수를 채움"""
	if prompt_key not in self.prompts:
	raise ValueError(f"Unknown prompt key: {prompt_key}")

	template = self.prompts[prompt_key]

	# 필수 변수 확인
	missing_vars = []
	for var in template.variables:
	if var not in kwargs:
	missing_vars.append(var)

	if missing_vars:
	raise ValueError(f"Missing required variables for prompt '{prompt_key}': {missing_vars}")

	# 템플릿 포맷팅
	try:
	return template.template.format(**kwargs)
	except KeyError as e:
	raise ValueError(f"Template formatting error for prompt '{prompt_key}': {e}")

	def get_temperature(self, prompt_key: str) -> float:
	"""프롬프트의 권장 temperature 반환"""
	if prompt_key not in self.prompts:
	raise ValueError(f"Unknown prompt key: {prompt_key}")
	return self.prompts[prompt_key].temperature

	def get_diversity_instruction(self, variation_id: int) -> str:
	"""variation_id에 따른 다양성 지시문 반환"""
	diversity_instructions = [
	"", # 기본
	"",
	"",
	""
	]

	# diversity_instructions = [
	# "", # 기본
	# "Implement this in a robust way that works well for various examples",
	# "Provide an alternative solution with a unique implementation style:",
	# "Try to implement using a different approach, algorithm, or coding style than typical solutions."
	# ]

	return diversity_instructions[variation_id % len(diversity_instructions)]

	def list_prompts(self) -> Dict[str, PromptTemplate]:
	"""모든 프롬프트 템플릿 목록 반환"""
	return self.prompts.copy()

	def get_prompts_by_type(self, benchmark: BenchmarkType) -> Dict[str, PromptTemplate]:
	"""벤치마크 타입별 프롬프트 반환"""
	return {
	key: template for key, template in self.prompts.items()
	if template.benchmark == benchmark or template.benchmark == BenchmarkType.GENERAL
	}


	# 전역 프롬프트 매니저 인스턴스
	prompt_manager = PromptManager()


	# 편의 함수들
	def get_prompt(prompt_key: str, **kwargs) -> str:
	"""프롬프트 가져오기 편의 함수"""
	return prompt_manager.get_prompt(prompt_key, **kwargs)


	def get_temperature(prompt_key: str) -> float:
	"""프롬프트 temperature 가져오기 편의 함수"""
	return prompt_manager.get_temperature(prompt_key)


	def get_diversity_instruction(variation_id: int) -> str:
	"""다양성 지시문 가져오기 편의 함수"""
	return prompt_manager.get_diversity_instruction(variation_id)