"""Utility helper functions for tmlr_reviewer package.""" import re from typing import Dict __all__ = [ "extract_all_tags", "extract_output_tags", "parse_json_from_text", ] def extract_all_tags(text: str) -> Dict[str, str]: """Extracts content between any XML-style tags from a string. Args: text: Input string that may contain tags Returns: Mapping of tag name -> tag content with surrounding whitespace stripped. """ pattern = r"<(\w+)>(.*?)" matches = re.findall(pattern, text, re.DOTALL) return {tag: content.strip() for tag, content in matches} def extract_output_tags(text: str) -> str: """Extract content between tags from *text* if present. Args: text: Arbitrary string that may contain output tags. Returns: The content inside the first pair of tags if found otherwise the original text. """ pattern = r"(.*?)" match = re.search(pattern, text, re.DOTALL) return match.group(1).strip() if match else text def parse_json_from_text(text: str): """Attempt to parse the *first* JSON object found in *text*. Returns the parsed Python object if successful, otherwise ``None``. """ import json, re # 1) Direct attempt try: return json.loads(text) except Exception: pass # 2) Look for JSON object inside text json_pattern = re.compile(r"\{[\s\S]*?\}") # non-greedy to first closing brace match = json_pattern.search(text) if match: candidate = match.group(0) try: return json.loads(candidate) except Exception: pass return None