| import json | |
| import re | |
| from typing import Any | |
| from .operator import BaseFieldOperator | |
| class ToString(BaseFieldOperator): | |
| def process(self, instance): | |
| return str(instance) | |
| class ToStringStripped(BaseFieldOperator): | |
| def process(self, instance): | |
| return str(instance).strip() | |
| class ToListByComma(BaseFieldOperator): | |
| def process(self, instance): | |
| output = [x.strip() for x in instance.split(",")] | |
| return output | |
| class RegexParser(BaseFieldOperator): | |
| """ | |
| A processor that uses regex in order to parse a string. | |
| """ | |
| regex: str | |
| termination_regex: str = None | |
| def process(self, text): | |
| if self.termination_regex is not None and re.fullmatch(self.termination_regex, text): | |
| return [] | |
| matches = re.findall(self.regex, text) | |
| return matches | |
| class LoadJson(BaseFieldOperator): | |
| def process(self, text): | |
| try: | |
| return json.loads(text) | |
| except json.JSONDecodeError: | |
| return [] | |
| class ListToEmptyEntitiesTuples(BaseFieldOperator): | |
| def process(self, lst): | |
| try: | |
| return [(str(item), "") for item in lst] | |
| except json.JSONDecodeError: | |
| return [] | |
| class DictOfListsToPairs(BaseFieldOperator): | |
| position_key_before_value: bool = True | |
| def process(self, obj): | |
| try: | |
| result = [] | |
| for key, values in obj.items(): | |
| for value in values: | |
| assert isinstance(value, str) | |
| pair = (key, value) if self.position_key_before_value else (value, key) | |
| result.append(pair) | |
| return result | |
| except: | |
| return [] | |