Spaces:
Build error
Build error
Commit
·
906ccdd
1
Parent(s):
083e7b4
code cleaning
Browse files- dataloader/__init__.py +10 -0
- dataloader/commongen.py +21 -0
- dataloader/dataloader.py +19 -0
- dataloader/gsm8k.py +22 -0
- dataloader/humaneval.py +21 -0
- dataloader/logic_grid.py +22 -0
- dataloader/mgsm.py +23 -0
- dataloader/responsegen.py +21 -0
dataloader/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agentverse.registry import Registry
|
| 2 |
+
|
| 3 |
+
dataloader_registry = Registry(name="dataloader")
|
| 4 |
+
|
| 5 |
+
from .gsm8k import GSM8KLoader
|
| 6 |
+
from .responsegen import ResponseGenLoader
|
| 7 |
+
from .humaneval import HumanevalLoader
|
| 8 |
+
from .commongen import CommongenLoader
|
| 9 |
+
from .mgsm import MGSMLoader
|
| 10 |
+
from .logic_grid import LogicGridLoader
|
dataloader/commongen.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .dataloader import DataLoader
|
| 2 |
+
from . import dataloader_registry
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataloader_registry.register("tasksolving/commongen/gpt-4")
|
| 7 |
+
@dataloader_registry.register("tasksolving/commongen/gpt-3.5")
|
| 8 |
+
class CommongenLoader(DataLoader):
|
| 9 |
+
def __init__(self, path: str):
|
| 10 |
+
super().__init__(path)
|
| 11 |
+
|
| 12 |
+
def load(self):
|
| 13 |
+
with open(self.path) as f:
|
| 14 |
+
for line in f:
|
| 15 |
+
line = json.loads(line)
|
| 16 |
+
self.examples.append(
|
| 17 |
+
{
|
| 18 |
+
"input": line["concepts"],
|
| 19 |
+
"answer": None,
|
| 20 |
+
}
|
| 21 |
+
)
|
dataloader/dataloader.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from abc import abstractmethod
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class DataLoader:
|
| 6 |
+
def __init__(self, path: str):
|
| 7 |
+
self.path = path
|
| 8 |
+
self.examples = []
|
| 9 |
+
self.load()
|
| 10 |
+
|
| 11 |
+
@abstractmethod
|
| 12 |
+
def load(self):
|
| 13 |
+
"""Make sure that each example is formatted as {"input": ..., "answer": ...}"""
|
| 14 |
+
with open(self.path) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
self.examples.append(json.loads(line))
|
| 17 |
+
|
| 18 |
+
def __iter__(self):
|
| 19 |
+
return iter(self.examples)
|
dataloader/gsm8k.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .dataloader import DataLoader
|
| 2 |
+
from . import dataloader_registry
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataloader_registry.register("tasksolving/gsm8k")
|
| 8 |
+
class GSM8KLoader(DataLoader):
|
| 9 |
+
def __init__(self, path: str):
|
| 10 |
+
self.answer_pat = re.compile(r"#### (-?\d+)")
|
| 11 |
+
super().__init__(path)
|
| 12 |
+
|
| 13 |
+
def load(self):
|
| 14 |
+
with open(self.path) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
line = json.loads(line)
|
| 17 |
+
self.examples.append(
|
| 18 |
+
{
|
| 19 |
+
"input": line["question"],
|
| 20 |
+
"answer": line["answer"].split('#### ')[-1],
|
| 21 |
+
}
|
| 22 |
+
)
|
dataloader/humaneval.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .dataloader import DataLoader
|
| 2 |
+
from . import dataloader_registry
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataloader_registry.register("tasksolving/humaneval/gpt-4")
|
| 7 |
+
@dataloader_registry.register("tasksolving/humaneval/gpt-3.5")
|
| 8 |
+
class HumanevalLoader(DataLoader):
|
| 9 |
+
def __init__(self, path: str):
|
| 10 |
+
super().__init__(path)
|
| 11 |
+
|
| 12 |
+
def load(self):
|
| 13 |
+
with open(self.path) as f:
|
| 14 |
+
for line in f:
|
| 15 |
+
line = json.loads(line)
|
| 16 |
+
self.examples.append(
|
| 17 |
+
{
|
| 18 |
+
"input": line["prompt"],
|
| 19 |
+
"answer": line["test"],
|
| 20 |
+
}
|
| 21 |
+
)
|
dataloader/logic_grid.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .dataloader import DataLoader
|
| 2 |
+
from . import dataloader_registry
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataloader_registry.register("tasksolving/logic_grid/gpt-4")
|
| 8 |
+
class LogicGridLoader(DataLoader):
|
| 9 |
+
def __init__(self, path: str):
|
| 10 |
+
self.answer_pat = re.compile(r"#### (-?\d+)")
|
| 11 |
+
super().__init__(path)
|
| 12 |
+
|
| 13 |
+
def load(self):
|
| 14 |
+
with open(self.path) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
line = json.loads(line)
|
| 17 |
+
self.examples.append(
|
| 18 |
+
{
|
| 19 |
+
"input": line["inputs"],
|
| 20 |
+
"answer": line["targets"][0],
|
| 21 |
+
}
|
| 22 |
+
)
|
dataloader/mgsm.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .dataloader import DataLoader
|
| 2 |
+
from . import dataloader_registry
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataloader_registry.register("tasksolving/mgsm/gpt-4")
|
| 8 |
+
@dataloader_registry.register("tasksolving/mgsm/gpt-3.5")
|
| 9 |
+
class MGSMLoader(DataLoader):
|
| 10 |
+
def __init__(self, path: str):
|
| 11 |
+
self.answer_pat = re.compile(r"#### (-?\d+)")
|
| 12 |
+
super().__init__(path)
|
| 13 |
+
|
| 14 |
+
def load(self):
|
| 15 |
+
with open(self.path) as f:
|
| 16 |
+
for line in f:
|
| 17 |
+
line = json.loads(line)
|
| 18 |
+
self.examples.append(
|
| 19 |
+
{
|
| 20 |
+
"input": line["question"],
|
| 21 |
+
"answer": line["answer_number"],
|
| 22 |
+
}
|
| 23 |
+
)
|
dataloader/responsegen.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .dataloader import DataLoader
|
| 2 |
+
from . import dataloader_registry
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataloader_registry.register("tasksolving/responsegen/gpt-3.5")
|
| 7 |
+
@dataloader_registry.register("tasksolving/responsegen/gpt-4")
|
| 8 |
+
class ResponseGenLoader(DataLoader):
|
| 9 |
+
def __init__(self, path: str):
|
| 10 |
+
super().__init__(path)
|
| 11 |
+
|
| 12 |
+
def load(self):
|
| 13 |
+
with open(self.path) as f:
|
| 14 |
+
for line in f:
|
| 15 |
+
line = json.loads(line)
|
| 16 |
+
self.examples.append(
|
| 17 |
+
{
|
| 18 |
+
"input": line["input"],
|
| 19 |
+
"answer": line["answer"],
|
| 20 |
+
}
|
| 21 |
+
)
|