"""Utility helper functions for tmlr_reviewer package."""

import re
from typing import Dict

__all__ = [
    "extract_all_tags",
    "extract_output_tags",
    "parse_json_from_text",
]


def extract_all_tags(text: str) -> Dict[str, str]:
    """Extracts content between any XML-style tags from a string.

    Args:
        text: Input string that may contain tags

    Returns:
        Mapping of tag name -> tag content with surrounding whitespace stripped.
    """
    pattern = r"<(\w+)>(.*?)</\1>"
    matches = re.findall(pattern, text, re.DOTALL)
    return {tag: content.strip() for tag, content in matches}


def extract_output_tags(text: str) -> str:
    """Extract content between <output></output> tags from *text* if present.

    Args:
        text: Arbitrary string that may contain output tags.

    Returns:
        The content inside the first pair of <output> tags if found otherwise the original text.
    """
    pattern = r"<output>(.*?)</output>"
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else text


def parse_json_from_text(text: str):
    """Attempt to parse the *first* JSON object found in *text*.

    Returns the parsed Python object if successful, otherwise ``None``.
    """
    import json, re

    # 1) Direct attempt
    try:
        return json.loads(text)
    except Exception:
        pass

    # 2) Look for JSON object inside text
    json_pattern = re.compile(r"\{[\s\S]*?\}")  # non-greedy to first closing brace
    match = json_pattern.search(text)
    if match:
        candidate = match.group(0)
        try:
            return json.loads(candidate)
        except Exception:
            pass

    return None