Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Dec 3, 2023

Commit

161e5a1

1 Parent(s): e4d5cd2

Upload templates.py with huggingface_hub

Browse files

Files changed (1) hide show

templates.py +150 -33

templates.py CHANGED Viewed

@@ -4,11 +4,13 @@ from dataclasses import field
 from typing import Any, Dict, List, Optional, Union
 from .artifact import Artifact
 from .dataclass import NonPositionalField
 from .instructions import Instruction, TextualInstruction
-from .operator import InstanceOperatorWithGlobalAccess, StreamInstanceOperator
-from .random_utils import random
 from .text_utils import split_words
 class Renderer(ABC):
@@ -39,10 +41,14 @@ class RenderFormatTemplate(Renderer, StreamInstanceOperator):
     random_reference: bool = False
     def verify(self):
-        assert isinstance(self.template, Template), "Template must be an instance of Template"
         assert self.template is not None, "Template must be specified"
-    def process(self, instance: Dict[str, Any], stream_name: str = None) -> Dict[str, Any]:
         return self.render(instance)
     def render(self, instance: Dict[str, Any]) -> Dict[str, Any]:
@@ -55,7 +61,7 @@ class RenderFormatTemplate(Renderer, StreamInstanceOperator):
         if self.template.is_multi_reference:
             references = targets
             if self.random_reference:
-                target = random.choice(references)
             else:
                 if len(references) == 0:
                     raise ValueError("No references found")
@@ -87,7 +93,7 @@ class RenderAutoFormatTemplate(RenderFormatTemplate):
         except:
             pass
-        inputs = {key: value for key, value in instance["inputs"].items()}
         return super().render({**instance, "inputs": inputs})
@@ -118,7 +124,12 @@ class RenderTemplatedICL(RenderAutoFormatTemplate):
         example = super().render(instance)
-        input_str = self.input_prefix + example["source"] + self.input_output_separator + self.output_prefix
         if self.instruction is not None:
             source += self.instruction_prefix + self.instruction() + self.demo_separator
@@ -136,7 +147,9 @@ class RenderTemplatedICL(RenderAutoFormatTemplate):
             )
             if self.size_limiter is not None:
-                if not self.size_limiter.check(source + demo_str + input_str + example["target"]):
                     continue
             source += demo_str
@@ -155,7 +168,9 @@ class RenderTemplatedICL(RenderAutoFormatTemplate):
 class InputOutputTemplate(Template):
     input_format: str = None
     output_format: str = None
-    postprocessors: List[str] = field(default_factory=lambda: ["processors.to_string_stripped"])
     def process_template(self, template: str, data: Dict[str, object]) -> str:
         data = {k: ", ".join(v) if isinstance(v, list) else v for k, v in data.items()}
@@ -166,16 +181,91 @@ class InputOutputTemplate(Template):
             return self.process_template(self.input_format, inputs)
         except KeyError as e:
             raise KeyError(
-                f"Available inputs are {inputs.keys()} but input format requires a different one: {self.input_format}"
-            )
     def process_outputs(self, outputs: Dict[str, object]) -> str:
         try:
             return self.process_template(self.output_format, outputs)
         except KeyError as e:
             raise KeyError(
-                f"Available inputs are {outputs.keys()} but output format requires a different one: {self.output_format}"
             )
     def get_postprocessors(self) -> List[str]:
         return self.postprocessors
@@ -188,10 +278,17 @@ class KeyValTemplate(Template):
     outputs_key_val_seperator: str = ": "
     use_keys_for_outputs: bool = False
-    postprocessors: List[str] = field(default_factory=lambda: ["processors.to_string_stripped"])
-    def process_dict(self, dic: Dict[str, object], key_val_sep, pairs_sep, use_keys) -> str:
-        dic = {k: ", ".join([str(vi) for vi in v]) if isinstance(v, list) else v for k, v in dic.items()}
         pairs = []
         for key, val in dic.items():
             key_val = [key, val] if use_keys else [val]
@@ -221,9 +318,10 @@ class KeyValTemplate(Template):
 class OutputQuantizingTemplate(InputOutputTemplate):
     quantum: float = 0.1
-    def process_outputs(self, outputs: Dict[str, object]) -> Dict[str, object]:
         quantized_outputs = {
-            key: round(input_float / self.quantum) * self.quantum for key, input_float in outputs.items()
         }
         return super().process_outputs(quantized_outputs)
@@ -235,12 +333,25 @@ class MultiLabelTemplate(InputOutputTemplate):
     output_format = "{labels}"
     empty_label = "None"
-    def process_outputs(self, outputs: Dict[str, object]) -> Dict[str, object]:
         labels = outputs[self.labels_field]
         if len(labels) == 0:
             labels = [self.empty_label]
         labels_str = self.labels_seprator.join(labels)
-        return super().process_outputs({"labels": labels_str})
 def escape_chars(s, chars_to_escape):
@@ -296,13 +407,16 @@ class SpanLabelingTemplate(SpanLabelingBaseTemplate):
 class SpanLabelingJsonTemplate(SpanLabelingBaseTemplate):
-    postprocessors = ["processors.load_json", "processors.dict_of_lists_to_value_key_pairs"]
     def span_label_pairs_to_targets(self, span_label_pairs):
         groups = {}
         for span, label in span_label_pairs:
             if label not in groups:
-                groups[label] = list()
             groups[label].append(span)
         if len(groups) > 0:
             targets = [json.dumps(groups)]
@@ -315,7 +429,9 @@ class AutoInputOutputTemplate(InputOutputTemplate):
     def infer_input_format(self, inputs):
         input_format = ""
         for key in inputs.keys():
-            name = " ".join(word.lower().capitalize() for word in split_words(key) if word != " ")
             input_format += name + ": " + "{" + key + "}" + "\n"
         self.input_format = input_format
@@ -332,21 +448,20 @@ class AutoInputOutputTemplate(InputOutputTemplate):
         return self.input_format is not None and self.output_format is not None
-from .collections import ListCollection
 class TemplatesList(ListCollection):
     def verify(self):
         for template in self.items:
             assert isinstance(template, Template)
-def outputs_inputs2templates(inputs: Union[str, List], outputs: Union[str, List]) -> TemplatesList:
-    """
-    combines input and output formats into their dot product
     :param inputs: list of input formats (or one)
     :param outputs: list of output formats (or one)
-    :return: TemplatesList of InputOutputTemplate
     """
     templates = []
     if isinstance(inputs, str):
@@ -367,8 +482,8 @@ def outputs_inputs2templates(inputs: Union[str, List], outputs: Union[str, List]
 def instructions2templates(
     instructions: List[TextualInstruction], templates: List[InputOutputTemplate]
 ) -> TemplatesList:
-    """
-    Insert instructions into per demonstration templates
     :param instructions:
     :param templates: strings containing {instuction} where the instruction should be placed
     :return:
@@ -378,7 +493,9 @@ def instructions2templates(
         for template in templates:
             res_templates.append(
                 InputOutputTemplate(
-                    input_format=template.input_format.replace("{instruction}", instruction.text),
                     output_format=template.output_format,
                 )
             )
@@ -387,5 +504,5 @@ def instructions2templates(
 class TemplatesDict(Dict):
     def verify(self):
-        for key, template in self.items():
             assert isinstance(template, Template)

 from typing import Any, Dict, List, Optional, Union
 from .artifact import Artifact
+from .collections import ListCollection
 from .dataclass import NonPositionalField
 from .instructions import Instruction, TextualInstruction
+from .operator import StreamInstanceOperator
+from .random_utils import get_random
 from .text_utils import split_words
+from .type_utils import isoftype
 class Renderer(ABC):
     random_reference: bool = False
     def verify(self):
+        assert isinstance(
+            self.template, Template
+        ), "Template must be an instance of Template"
         assert self.template is not None, "Template must be specified"
+    def process(
+        self, instance: Dict[str, Any], stream_name: Optional[str] = None
+    ) -> Dict[str, Any]:
         return self.render(instance)
     def render(self, instance: Dict[str, Any]) -> Dict[str, Any]:
         if self.template.is_multi_reference:
             references = targets
             if self.random_reference:
+                target = get_random().choice(references)
             else:
                 if len(references) == 0:
                     raise ValueError("No references found")
         except:
             pass
+        inputs = dict(instance["inputs"].items())
         return super().render({**instance, "inputs": inputs})
         example = super().render(instance)
+        input_str = (
+            self.input_prefix
+            + example["source"]
+            + self.input_output_separator
+            + self.output_prefix
+        )
         if self.instruction is not None:
             source += self.instruction_prefix + self.instruction() + self.demo_separator
             )
             if self.size_limiter is not None:
+                if not self.size_limiter.check(
+                    source + demo_str + input_str + example["target"]
+                ):
                     continue
             source += demo_str
 class InputOutputTemplate(Template):
     input_format: str = None
     output_format: str = None
+    postprocessors: List[str] = field(
+        default_factory=lambda: ["processors.to_string_stripped"]
+    )
     def process_template(self, template: str, data: Dict[str, object]) -> str:
         data = {k: ", ".join(v) if isinstance(v, list) else v for k, v in data.items()}
             return self.process_template(self.input_format, inputs)
         except KeyError as e:
             raise KeyError(
+                f"Available inputs are {list(inputs.keys())} but input format requires a different ones: '{self.input_format}'"
+            ) from e
     def process_outputs(self, outputs: Dict[str, object]) -> str:
         try:
             return self.process_template(self.output_format, outputs)
         except KeyError as e:
             raise KeyError(
+                f"Available outputs are {outputs.keys()} but output format requires a different one: {self.output_format}"
+            ) from e
+    def get_postprocessors(self) -> List[str]:
+        return self.postprocessors
+class YesNoTemplate(Template):
+    """A template for generating binary Yes/No questions asking whether an input text is of a specific class.
+    input_format:
+        Defines the format of the question.
+    class_field:
+        Defines the field that contains the name of the class that this template
+        asks of.
+    label_field:
+        Defines the field which contains the true label of the input text. If a gold label is equal to the
+        value in class_name, then the correct output is self.yes_answer (by default, "Yes").
+        Otherwise the correct output is self.no_answer (by default, "No").
+    yes_answer:
+        The output value for when the gold label equals self.class_name.
+        Defaults to "Yes".
+    no_answer:
+        The output value for when the gold label differs from self.class_name.
+        Defaults to "No".
+    """
+    input_format: str = None
+    class_field: str = None
+    label_field: str = None
+    yes_answer: str = "Yes"
+    no_answer: str = "No"
+    postprocessors: List[str] = field(
+        default_factory=lambda: ["processors.to_string_stripped"]
+    )
+    def process_inputs(self, inputs: Dict[str, object]) -> str:
+        try:
+            data = {
+                k: ", ".join(v) if isinstance(v, list) else v for k, v in inputs.items()
+            }
+            return self.input_format.format(**data)
+        except KeyError as e:
+            raise RuntimeError(
+                f"Available inputs are {list(inputs.keys())} but input format requires a different one: {self.input_format}"
+            ) from e
+    def process_outputs(self, outputs: Dict[str, object]) -> str:
+        try:
+            gold_class_names = outputs[self.label_field]
+        except KeyError as e:
+            raise RuntimeError(
+                f"Available outputs are {list(outputs.keys())}, missing required label field: '{self.label_field}'."
+            ) from e
+        if not isinstance(gold_class_names, list) or not gold_class_names:
+            raise RuntimeError(
+                f"Unexpected value for gold_class_names: '{gold_class_names}'. Expected a non-empty list."
             )
+        try:
+            queried_class_names = outputs[self.class_field]
+        except KeyError as e:
+            raise RuntimeError(
+                f"Available outputs are {list(outputs.keys())}, missing required class field: '{self.class_field}'."
+            ) from e
+        if (
+            not queried_class_names
+            or not isinstance(queried_class_names, list)
+            or not len(queried_class_names) == 1
+        ):
+            raise RuntimeError(
+                f"Unexpected value for queried_class_names: '{queried_class_names}'. Expected a list with one item."
+            )
+        queried_class_name = queried_class_names[0]
+        if queried_class_name in gold_class_names:
+            return self.yes_answer
+        return self.no_answer
     def get_postprocessors(self) -> List[str]:
         return self.postprocessors
     outputs_key_val_seperator: str = ": "
     use_keys_for_outputs: bool = False
+    postprocessors: List[str] = field(
+        default_factory=lambda: ["processors.to_string_stripped"]
+    )
+    def process_dict(
+        self, dic: Dict[str, object], key_val_sep, pairs_sep, use_keys
+    ) -> str:
+        dic = {
+            k: ", ".join([str(vi) for vi in v]) if isinstance(v, list) else v
+            for k, v in dic.items()
+        }
         pairs = []
         for key, val in dic.items():
             key_val = [key, val] if use_keys else [val]
 class OutputQuantizingTemplate(InputOutputTemplate):
     quantum: float = 0.1
+    def process_outputs(self, outputs: Dict[str, object]) -> str:
         quantized_outputs = {
+            key: round(input_float / self.quantum) * self.quantum
+            for key, input_float in outputs.items()
         }
         return super().process_outputs(quantized_outputs)
     output_format = "{labels}"
     empty_label = "None"
+    def process_outputs(self, outputs: Dict[str, object]) -> str:
         labels = outputs[self.labels_field]
         if len(labels) == 0:
             labels = [self.empty_label]
         labels_str = self.labels_seprator.join(labels)
+        return super().process_outputs({self.labels_field: labels_str})
+class MultiReferenceTemplate(InputOutputTemplate):
+    references_field: str = "references"
+    is_multi_reference = True
+    def process_outputs(self, outputs: Dict[str, object]) -> List[str]:
+        references = outputs[self.references_field]
+        if not isoftype(references, List[str]):
+            raise ValueError(
+                f"MultiReferenceTemplate requires that references field {self.references_field} is of type List[str]."
+            )
+        return references
 def escape_chars(s, chars_to_escape):
 class SpanLabelingJsonTemplate(SpanLabelingBaseTemplate):
+    postprocessors = [
+        "processors.load_json",
+        "processors.dict_of_lists_to_value_key_pairs",
+    ]
     def span_label_pairs_to_targets(self, span_label_pairs):
         groups = {}
         for span, label in span_label_pairs:
             if label not in groups:
+                groups[label] = []
             groups[label].append(span)
         if len(groups) > 0:
             targets = [json.dumps(groups)]
     def infer_input_format(self, inputs):
         input_format = ""
         for key in inputs.keys():
+            name = " ".join(
+                word.lower().capitalize() for word in split_words(key) if word != " "
+            )
             input_format += name + ": " + "{" + key + "}" + "\n"
         self.input_format = input_format
         return self.input_format is not None and self.output_format is not None
 class TemplatesList(ListCollection):
     def verify(self):
         for template in self.items:
             assert isinstance(template, Template)
+def outputs_inputs2templates(
+    inputs: Union[str, List], outputs: Union[str, List]
+) -> TemplatesList:
+    """Combines input and output formats into their dot product.
     :param inputs: list of input formats (or one)
     :param outputs: list of output formats (or one)
+    :return: TemplatesList of InputOutputTemplate.
     """
     templates = []
     if isinstance(inputs, str):
 def instructions2templates(
     instructions: List[TextualInstruction], templates: List[InputOutputTemplate]
 ) -> TemplatesList:
+    """Insert instructions into per demonstration templates.
     :param instructions:
     :param templates: strings containing {instuction} where the instruction should be placed
     :return:
         for template in templates:
             res_templates.append(
                 InputOutputTemplate(
+                    input_format=template.input_format.replace(
+                        "{instruction}", instruction.text
+                    ),
                     output_format=template.output_format,
                 )
             )
 class TemplatesDict(Dict):
     def verify(self):
+        for _key, template in self.items():
             assert isinstance(template, Template)