Upload stream.py with huggingface_hub
Browse files
stream.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from typing import Dict, Iterable
|
| 2 |
|
| 3 |
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
@@ -86,6 +87,10 @@ def is_stream(obj):
|
|
| 86 |
return isinstance(obj, IterableDataset) or isinstance(obj, Stream) or isinstance(obj, Dataset)
|
| 87 |
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
class MultiStream(dict):
|
| 90 |
"""A class for handling multiple streams of data in a dictionary-like format.
|
| 91 |
|
|
@@ -179,7 +184,7 @@ class MultiStream(dict):
|
|
| 179 |
|
| 180 |
return cls(
|
| 181 |
{
|
| 182 |
-
key: Stream(
|
| 183 |
for key, iterable in iterables.items()
|
| 184 |
}
|
| 185 |
)
|
|
|
|
| 1 |
+
from copy import deepcopy
|
| 2 |
from typing import Dict, Iterable
|
| 3 |
|
| 4 |
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
|
|
| 87 |
return isinstance(obj, IterableDataset) or isinstance(obj, Stream) or isinstance(obj, Dataset)
|
| 88 |
|
| 89 |
|
| 90 |
+
def iterable_starter(iterable):
|
| 91 |
+
return iter(deepcopy(iterable))
|
| 92 |
+
|
| 93 |
+
|
| 94 |
class MultiStream(dict):
|
| 95 |
"""A class for handling multiple streams of data in a dictionary-like format.
|
| 96 |
|
|
|
|
| 184 |
|
| 185 |
return cls(
|
| 186 |
{
|
| 187 |
+
key: Stream(iterable_starter, gen_kwargs={"iterable": iterable}, streaming=streaming, caching=caching)
|
| 188 |
for key, iterable in iterables.items()
|
| 189 |
}
|
| 190 |
)
|