Upload standard.py with huggingface_hub
Browse files- standard.py +19 -3
standard.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import List
|
| 2 |
|
| 3 |
from .card import TaskCard
|
| 4 |
-
from .dataclass import Field, InternalField, OptionalField
|
| 5 |
from .formats import Format, SystemFormat
|
| 6 |
from .logging_utils import get_logger
|
| 7 |
from .operator import SourceSequentialOperator, StreamingOperator
|
|
@@ -29,6 +29,8 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
|
|
| 29 |
template: Template = None
|
| 30 |
system_prompt: SystemPrompt = Field(default_factory=EmptySystemPrompt)
|
| 31 |
format: Format = Field(default_factory=SystemFormat)
|
|
|
|
|
|
|
| 32 |
|
| 33 |
loader_limit: int = None
|
| 34 |
|
|
@@ -107,6 +109,18 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
|
|
| 107 |
self.test_refiner.apply_to_streams = ["test"]
|
| 108 |
self.steps.append(self.test_refiner)
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def prepare(self):
|
| 111 |
self.steps = [
|
| 112 |
self.card.loader,
|
|
@@ -173,12 +187,12 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
|
|
| 173 |
if self.augmentor.augment_model_input:
|
| 174 |
self.steps.append(self.augmentor)
|
| 175 |
|
| 176 |
-
postprocessors = self.
|
| 177 |
|
| 178 |
self.steps.append(
|
| 179 |
ToUnitxtGroup(
|
| 180 |
group="unitxt",
|
| 181 |
-
metrics=
|
| 182 |
postprocessors=postprocessors,
|
| 183 |
)
|
| 184 |
)
|
|
@@ -222,6 +236,8 @@ class StandardRecipe(StandardRecipeWithIndexes):
|
|
| 222 |
system_prompt (SystemPrompt, optional): SystemPrompt object to be used for the recipe.
|
| 223 |
loader_limit (int, optional): Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
|
| 224 |
format (SystemFormat, optional): SystemFormat object to be used for the recipe.
|
|
|
|
|
|
|
| 225 |
train_refiner (StreamRefiner, optional): Train refiner to be used in the recipe.
|
| 226 |
max_train_instances (int, optional): Maximum training instances for the refiner.
|
| 227 |
validation_refiner (StreamRefiner, optional): Validation refiner to be used in the recipe.
|
|
|
|
| 1 |
from typing import List
|
| 2 |
|
| 3 |
from .card import TaskCard
|
| 4 |
+
from .dataclass import Field, InternalField, NonPositionalField, OptionalField
|
| 5 |
from .formats import Format, SystemFormat
|
| 6 |
from .logging_utils import get_logger
|
| 7 |
from .operator import SourceSequentialOperator, StreamingOperator
|
|
|
|
| 29 |
template: Template = None
|
| 30 |
system_prompt: SystemPrompt = Field(default_factory=EmptySystemPrompt)
|
| 31 |
format: Format = Field(default_factory=SystemFormat)
|
| 32 |
+
metrics: List[str] = NonPositionalField(default=None)
|
| 33 |
+
postprocessors: List[str] = NonPositionalField(default=None)
|
| 34 |
|
| 35 |
loader_limit: int = None
|
| 36 |
|
|
|
|
| 109 |
self.test_refiner.apply_to_streams = ["test"]
|
| 110 |
self.steps.append(self.test_refiner)
|
| 111 |
|
| 112 |
+
def prepare_metrics_and_postprocessors(self):
|
| 113 |
+
if self.postprocessors is None:
|
| 114 |
+
postprocessors = self.template.get_postprocessors()
|
| 115 |
+
else:
|
| 116 |
+
postprocessors = self.postprocessors
|
| 117 |
+
|
| 118 |
+
if self.metrics is None:
|
| 119 |
+
metrics = self.card.task.metrics
|
| 120 |
+
else:
|
| 121 |
+
metrics = self.metrics
|
| 122 |
+
return metrics, postprocessors
|
| 123 |
+
|
| 124 |
def prepare(self):
|
| 125 |
self.steps = [
|
| 126 |
self.card.loader,
|
|
|
|
| 187 |
if self.augmentor.augment_model_input:
|
| 188 |
self.steps.append(self.augmentor)
|
| 189 |
|
| 190 |
+
metrics, postprocessors = self.prepare_metrics_and_postprocessors()
|
| 191 |
|
| 192 |
self.steps.append(
|
| 193 |
ToUnitxtGroup(
|
| 194 |
group="unitxt",
|
| 195 |
+
metrics=metrics,
|
| 196 |
postprocessors=postprocessors,
|
| 197 |
)
|
| 198 |
)
|
|
|
|
| 236 |
system_prompt (SystemPrompt, optional): SystemPrompt object to be used for the recipe.
|
| 237 |
loader_limit (int, optional): Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
|
| 238 |
format (SystemFormat, optional): SystemFormat object to be used for the recipe.
|
| 239 |
+
metrics (List[str]): list of catalog metrics to use with this recipe.
|
| 240 |
+
postprocessors (List[str]): list of catalog processors to apply at post processing. (Not recommended to use from here)
|
| 241 |
train_refiner (StreamRefiner, optional): Train refiner to be used in the recipe.
|
| 242 |
max_train_instances (int, optional): Maximum training instances for the refiner.
|
| 243 |
validation_refiner (StreamRefiner, optional): Validation refiner to be used in the recipe.
|