Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on Mar 5, 2024

Commit

88a9416

verified ·

1 Parent(s): 2109a58

Upload templates.py with huggingface_hub

Browse files

Files changed (1) hide show

templates.py +69 -42

templates.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 from abc import abstractmethod
-from dataclasses import field
 from typing import Any, Dict, List, Optional, Tuple
 from .collections import ListCollection
@@ -14,12 +13,21 @@ class Template(StreamInstanceOperator):
     """The role of template is to take the fields of every instance and verbalize it.
     Meaning the template is taking the instance and generating source, target and references.
     """
     skip_rendered_instance: bool = NonPositionalField(default=True)
     postprocessors: List[str] = NonPositionalField(
         default_factory=lambda: ["processors.to_string_stripped"]
     )
     def process(
         self, instance: Dict[str, Any], stream_name: Optional[str] = None
@@ -35,7 +43,7 @@ class Template(StreamInstanceOperator):
         inputs = instance.get("inputs")
         outputs = instance.get("outputs")
-        source = self.inputs_to_source(inputs)
         target, references = self.outputs_to_target_and_references(outputs)
         return {
@@ -43,10 +51,12 @@ class Template(StreamInstanceOperator):
             "source": source,
             "target": target,
             "references": references,
         }
     @abstractmethod
-    def inputs_to_source(self, inputs: Dict[str, object]) -> str:
         pass
     @abstractmethod
@@ -72,13 +82,17 @@ class InputOutputTemplate(Template):
         data = {k: ", ".join(v) if isinstance(v, list) else v for k, v in data.items()}
         return template.format(**data)
-    def inputs_to_source(self, inputs: Dict[str, object]) -> str:
-        try:
-            return self.process_template(self.input_format, inputs)
-        except KeyError as e:
-            raise KeyError(
-                f"Available inputs are {list(inputs.keys())} but input format requires a different ones: '{self.input_format}'"
-            ) from e
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         try:
@@ -92,6 +106,25 @@ class InputOutputTemplate(Template):
         return target, references
 class MultipleChoiceTemplate(Template):
     """Formats the input (that specifies the question), the multiple choices to select the answer from, and specifies the field with the correct answer."""
@@ -149,19 +182,22 @@ class MultipleChoiceTemplate(Template):
             )
         return enumrated_choices
-    def inputs_to_source(self, inputs: Dict[str, object]) -> str:
         choices = self.get_choices(inputs, self.source_choice_format)
         inputs = {
             "numerals": ",".join(self.get_choices(inputs, "{choice_numeral}")),
             **inputs,
             self.choices_field: self.choices_seperator.join(choices),
         }
-        try:
-            return self.input_format.format(**inputs)
-        except KeyError as e:
-            raise KeyError(
-                f"Available inputs are {inputs.keys()} but input format requires a different one: {self.input_format}"
-            ) from e
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         target = outputs[self.target_field]
@@ -221,20 +257,20 @@ class YesNoTemplate(Template):
     label_field: str = None
     yes_answer: str = "Yes"
     no_answer: str = "No"
-    postprocessors: List[str] = field(
-        default_factory=lambda: ["processors.to_string_stripped"]
-    )
-    def inputs_to_source(self, inputs: Dict[str, object]) -> str:
-        try:
-            data = {
-                k: ", ".join(v) if isinstance(v, list) else v for k, v in inputs.items()
-            }
-            return self.input_format.format(**data)
-        except KeyError as e:
-            raise RuntimeError(
-                f"Available inputs are {list(inputs.keys())} but input format requires a different one: {self.input_format}"
-            ) from e
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         try:
@@ -266,9 +302,6 @@ class YesNoTemplate(Template):
             return self.yes_answer, [self.yes_answer]
         return self.no_answer, [self.no_answer]
-    def get_postprocessors(self) -> List[str]:
-        return self.postprocessors
 class KeyValTemplate(Template):
     """Generate field 'source' from fields designated as input, and fields 'target' and 'references' from fields designated as output, of the processed instance.
@@ -282,10 +315,6 @@ class KeyValTemplate(Template):
     outputs_key_val_seperator: str = ": "
     use_keys_for_outputs: bool = False
-    postprocessors: List[str] = field(
-        default_factory=lambda: ["processors.to_string_stripped"]
-    )
     def process_dict(
         self, dic: Dict[str, object], key_val_sep, pairs_sep, use_keys
     ) -> str:
@@ -299,13 +328,14 @@ class KeyValTemplate(Template):
             pairs.append(key_val_sep.join(key_val))
         return pairs_sep.join(pairs)
-    def inputs_to_source(self, inputs: Dict[str, object]) -> str:
-        return self.process_dict(
             inputs,
             key_val_sep=self.key_val_seperator,
             pairs_sep=self.pairs_seperator,
             use_keys=self.use_keys_for_inputs,
         )
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         target = self.process_dict(
@@ -316,9 +346,6 @@ class KeyValTemplate(Template):
         )
         return target, [target]
-    def get_postprocessors(self) -> List[str]:
-        return self.postprocessors
 class OutputQuantizingTemplate(InputOutputTemplate):
     quantum: float = 0.1

 import json
 from abc import abstractmethod
 from typing import Any, Dict, List, Optional, Tuple
 from .collections import ListCollection
     """The role of template is to take the fields of every instance and verbalize it.
     Meaning the template is taking the instance and generating source, target and references.
+    Args:
+        skip_rendered_instance (bool): if "source", "target", and "references" are already defined fields in the instance, skip its processing
+        postprocessors: a list of strings being artifact names of text processors, to be applied on the model output
+        instruction: a formatting string that yields an instruction with potential participation of values from the "inputs" part of the instance
+        target_prefix: a string to be used to format the prompt. Not a formatting string.
     """
     skip_rendered_instance: bool = NonPositionalField(default=True)
     postprocessors: List[str] = NonPositionalField(
         default_factory=lambda: ["processors.to_string_stripped"]
     )
+    instruction: str = NonPositionalField(default_factory=lambda: "")
+    target_prefix: str = NonPositionalField(default_factory=lambda: "")
     def process(
         self, instance: Dict[str, Any], stream_name: Optional[str] = None
         inputs = instance.get("inputs")
         outputs = instance.get("outputs")
+        source, instruction = self.inputs_to_source(inputs)
         target, references = self.outputs_to_target_and_references(outputs)
         return {
             "source": source,
             "target": target,
             "references": references,
+            "instruction": instruction,
+            "target_prefix": self.target_prefix.format(**inputs),
         }
     @abstractmethod
+    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
         pass
     @abstractmethod
         data = {k: ", ".join(v) if isinstance(v, list) else v for k, v in data.items()}
         return template.format(**data)
+    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+        formatted = []
+        for formatting in [self.input_format, self.instruction]:
+            try:
+                formatted.append(self.process_template(formatting, inputs))
+            except KeyError as e:
+                raise KeyError(
+                    f"Available inputs are {list(inputs.keys())} but input format requires a different ones: '{formatting}'"
+                ) from e
+        return tuple(formatted)
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         try:
         return target, references
+class InputOutputReferenceTemplate(InputOutputTemplate):
+    reference: str
+    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
+        output_fields = {}
+        for name, val in [
+            ("target", self.output_format),
+            ("reference", self.reference),
+        ]:
+            try:
+                result = self.process_template(val, outputs)
+                output_fields[name] = result
+            except KeyError as e:
+                raise KeyError(
+                    f"Available outputs are {outputs.keys()} but {name} requires a different one: {val}"
+                ) from e
+        return output_fields["target"], [output_fields["reference"]]
 class MultipleChoiceTemplate(Template):
     """Formats the input (that specifies the question), the multiple choices to select the answer from, and specifies the field with the correct answer."""
             )
         return enumrated_choices
+    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
         choices = self.get_choices(inputs, self.source_choice_format)
         inputs = {
             "numerals": ",".join(self.get_choices(inputs, "{choice_numeral}")),
             **inputs,
             self.choices_field: self.choices_seperator.join(choices),
         }
+        formatted = []
+        for formatting in [self.input_format, self.instruction]:
+            try:
+                formatted.append(formatting.format(**inputs))
+            except KeyError as e:
+                raise KeyError(
+                    f"Available inputs are {inputs.keys()} but input format requires a different one: {formatting}"
+                ) from e
+        return tuple(formatted)
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         target = outputs[self.target_field]
     label_field: str = None
     yes_answer: str = "Yes"
     no_answer: str = "No"
+    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+        data = {
+            k: ", ".join(v) if isinstance(v, list) else v for k, v in inputs.items()
+        }
+        formatted = []
+        for formatting in [self.input_format, self.instruction]:
+            try:
+                formatted.append(formatting.format(**data))
+            except KeyError as e:
+                raise RuntimeError(
+                    f"Available inputs are {list(inputs.keys())} but input format requires a different one: {formatting}"
+                ) from e
+        return tuple(formatted)
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         try:
             return self.yes_answer, [self.yes_answer]
         return self.no_answer, [self.no_answer]
 class KeyValTemplate(Template):
     """Generate field 'source' from fields designated as input, and fields 'target' and 'references' from fields designated as output, of the processed instance.
     outputs_key_val_seperator: str = ": "
     use_keys_for_outputs: bool = False
     def process_dict(
         self, dic: Dict[str, object], key_val_sep, pairs_sep, use_keys
     ) -> str:
             pairs.append(key_val_sep.join(key_val))
         return pairs_sep.join(pairs)
+    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+        ret = self.process_dict(
             inputs,
             key_val_sep=self.key_val_seperator,
             pairs_sep=self.pairs_seperator,
             use_keys=self.use_keys_for_inputs,
         )
+        return (ret, ret)
     def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         target = self.process_dict(
         )
         return target, [target]
 class OutputQuantizingTemplate(InputOutputTemplate):
     quantum: float = 0.1