Spaces:
Running
Running
upd
Browse files
tasks.py
CHANGED
@@ -94,6 +94,10 @@ class Task:
|
|
94 |
def samples(self):
|
95 |
return self.dataset[self.input_column]
|
96 |
|
|
|
|
|
|
|
|
|
97 |
@cached_property
|
98 |
def dataset(self):
|
99 |
ds = (
|
@@ -149,12 +153,10 @@ class Task:
|
|
149 |
)
|
150 |
return metric._compute
|
151 |
|
152 |
-
@
|
153 |
def result(self) -> dict:
|
154 |
assert self.outputs, "Please run the task first."
|
155 |
-
results = self.metric(
|
156 |
-
responses=self.outputs, references=self.dataset[self.label_column]
|
157 |
-
)
|
158 |
# logging.info(f"{self.name}:{results}")
|
159 |
return results
|
160 |
|
|
|
94 |
def samples(self):
|
95 |
return self.dataset[self.input_column]
|
96 |
|
97 |
+
@cached_property
|
98 |
+
def labels(self):
|
99 |
+
return self.dataset[self.label_column]
|
100 |
+
|
101 |
@cached_property
|
102 |
def dataset(self):
|
103 |
ds = (
|
|
|
153 |
)
|
154 |
return metric._compute
|
155 |
|
156 |
+
@property
|
157 |
def result(self) -> dict:
|
158 |
assert self.outputs, "Please run the task first."
|
159 |
+
results = self.metric(self.outputs, self.labels)
|
|
|
|
|
160 |
# logging.info(f"{self.name}:{results}")
|
161 |
return results
|
162 |
|
tlem.py
CHANGED
@@ -82,6 +82,7 @@ class Suite(EvaluationSuite):
|
|
82 |
"ceval-chat",
|
83 |
"bbh",
|
84 |
"drop",
|
|
|
85 |
]
|
86 |
|
87 |
def __getitem__(self, key) -> Task:
|
@@ -215,6 +216,7 @@ class Suite(EvaluationSuite):
|
|
215 |
except ValueError:
|
216 |
logging.debug(f"add {task.name} to suite.")
|
217 |
self.tasks.append(task)
|
|
|
218 |
return self.tasks[-1]
|
219 |
|
220 |
def drop_duplicates(self, suite):
|
|
|
82 |
"ceval-chat",
|
83 |
"bbh",
|
84 |
"drop",
|
85 |
+
"MATH",
|
86 |
]
|
87 |
|
88 |
def __getitem__(self, key) -> Task:
|
|
|
216 |
except ValueError:
|
217 |
logging.debug(f"add {task.name} to suite.")
|
218 |
self.tasks.append(task)
|
219 |
+
logging.debug(self.tasks)
|
220 |
return self.tasks[-1]
|
221 |
|
222 |
def drop_duplicates(self, suite):
|