from pyabsa import AspectTermExtraction as ATEPC from pyabsa import TaskCodeOption from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset def load_atepc_examples(dataset_name: str) -> list[str]: task = TaskCodeOption.Aspect_Polarity_Classification atepc_dataset_item = ATEPC.ATEPCDatasetList().__getattribute__(dataset_name) dataset_files = detect_infer_dataset(atepc_dataset_item, task) all_lines = [] if isinstance(dataset_files, str): dataset_files = [dataset_files] for fpath in dataset_files: print(f"Loading ATEPC examples from: {fpath}") try: with open(fpath, "r", encoding="utf-8") as fin: lines = fin.readlines() for line in lines: cleaned_line = line.split("$LABEL$")[0] if "$LABEL$" in line else line cleaned_line = cleaned_line.replace("[B-ASP]", "").replace("[E-ASP]", "").strip() if cleaned_line: all_lines.append(cleaned_line) except FileNotFoundError: print(f"Warning: Dataset file not found: {fpath}") except Exception as e: print(f"Error loading {fpath}: {e}") seen = set() unique_ordered_lines = [] for line in all_lines: if line not in seen: unique_ordered_lines.append(line) seen.add(line) return unique_ordered_lines