|
|
|
""" |
|
EvalPlus ์ง์ ์ฌ์ฉ ํ
์คํธ |
|
""" |
|
|
|
import json |
|
import sys |
|
from pathlib import Path |
|
|
|
|
|
sys.path.append(str(Path(__file__).parent)) |
|
sys.path.insert(0, "/home/ubuntu/RLVR/TestTime-RLVR-v2/evaluation/code_eval/coding") |
|
|
|
|
|
print(f"Python path: {sys.path[:3]}") |
|
try: |
|
import evalplus |
|
print("EvalPlus imported successfully!") |
|
except ImportError as e: |
|
print(f"Failed to import evalplus: {e}") |
|
|
|
from absolute_zero_reasoner.testtime.solution_generator import InitialSolutionGenerator |
|
from absolute_zero_reasoner.testtime.config import TestTimeConfig |
|
from absolute_zero_reasoner.testtime.logger import TestTimeLogger |
|
|
|
def load_single_problem(task_id: str = "Mbpp/2"): |
|
"""๋จ์ผ MBPP ๋ฌธ์ ๋ก๋""" |
|
dataset_path = Path("/home/ubuntu/RLVR/TestTime-RLVR-v2/evaluation/code_eval/data/MbppPlus.jsonl") |
|
|
|
with open(dataset_path, 'r') as f: |
|
for line in f: |
|
problem = json.loads(line.strip()) |
|
if problem['task_id'] == task_id: |
|
return problem |
|
|
|
raise ValueError(f"Problem {task_id} not found") |
|
|
|
def test_evalplus_direct(): |
|
"""EvalPlus ์ง์ ์ฌ์ฉ ํ
์คํธ""" |
|
|
|
config = TestTimeConfig() |
|
logger = TestTimeLogger() |
|
|
|
|
|
class MockModel: |
|
def device(self): |
|
return 'cpu' |
|
|
|
class MockTokenizer: |
|
eos_token = '</s>' |
|
pad_token = '</s>' |
|
|
|
|
|
generator = InitialSolutionGenerator( |
|
model=MockModel(), |
|
tokenizer=MockTokenizer(), |
|
config=config, |
|
logger=logger, |
|
use_vllm=False |
|
) |
|
|
|
|
|
problem = load_single_problem("Mbpp/2") |
|
print(f"Testing problem: {problem['task_id']}") |
|
print(f"Entry point: {problem['entry_point']}") |
|
print(f"Base inputs: {len(problem.get('base_input', []))}") |
|
print(f"Plus inputs: {len(problem.get('plus_input', []))}") |
|
|
|
|
|
solution = problem['canonical_solution'] |
|
print("\nTesting with canonical solution:") |
|
print(solution) |
|
|
|
|
|
result = generator.evaluate_solution(problem, solution) |
|
|
|
|
|
print("\n=== Evaluation Result (Canonical) ===") |
|
print(f"Correct: {result['correct']}") |
|
print(f"Base passed: {result['base_passed']}/{result['base_total']}") |
|
print(f"Plus passed: {result['plus_passed']}/{result['plus_total']}") |
|
print(f"Total passed: {result['passed_tests']}/{result['total_tests']}") |
|
if result['error']: |
|
print(f"Error: {result['error']}") |
|
|
|
if __name__ == "__main__": |
|
test_evalplus_direct() |