|
from collections import defaultdict, deque |
|
from itertools import filterfalse |
|
|
|
|
|
def unique_everseen(iterable, key=None): |
|
"List unique elements, preserving order. Remember all elements ever seen." |
|
|
|
|
|
seen = set() |
|
seen_add = seen.add |
|
if key is None: |
|
for element in filterfalse(seen.__contains__, iterable): |
|
seen_add(element) |
|
yield element |
|
else: |
|
for element in iterable: |
|
k = key(element) |
|
if k not in seen: |
|
seen_add(k) |
|
yield element |
|
|
|
|
|
|
|
def always_iterable(obj, base_type=(str, bytes)): |
|
"""If *obj* is iterable, return an iterator over its items:: |
|
|
|
>>> obj = (1, 2, 3) |
|
>>> list(always_iterable(obj)) |
|
[1, 2, 3] |
|
|
|
If *obj* is not iterable, return a one-item iterable containing *obj*:: |
|
|
|
>>> obj = 1 |
|
>>> list(always_iterable(obj)) |
|
[1] |
|
|
|
If *obj* is ``None``, return an empty iterable: |
|
|
|
>>> obj = None |
|
>>> list(always_iterable(None)) |
|
[] |
|
|
|
By default, binary and text strings are not considered iterable:: |
|
|
|
>>> obj = 'foo' |
|
>>> list(always_iterable(obj)) |
|
['foo'] |
|
|
|
If *base_type* is set, objects for which ``isinstance(obj, base_type)`` |
|
returns ``True`` won't be considered iterable. |
|
|
|
>>> obj = {'a': 1} |
|
>>> list(always_iterable(obj)) # Iterate over the dict's keys |
|
['a'] |
|
>>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit |
|
[{'a': 1}] |
|
|
|
Set *base_type* to ``None`` to avoid any special handling and treat objects |
|
Python considers iterable as iterable: |
|
|
|
>>> obj = 'foo' |
|
>>> list(always_iterable(obj, base_type=None)) |
|
['f', 'o', 'o'] |
|
""" |
|
if obj is None: |
|
return iter(()) |
|
|
|
if (base_type is not None) and isinstance(obj, base_type): |
|
return iter((obj,)) |
|
|
|
try: |
|
return iter(obj) |
|
except TypeError: |
|
return iter((obj,)) |
|
|
|
|
|
|
|
class bucket: |
|
"""Wrap *iterable* and return an object that buckets the iterable into |
|
child iterables based on a *key* function. |
|
|
|
>>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] |
|
>>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character |
|
>>> sorted(list(s)) # Get the keys |
|
['a', 'b', 'c'] |
|
>>> a_iterable = s['a'] |
|
>>> next(a_iterable) |
|
'a1' |
|
>>> next(a_iterable) |
|
'a2' |
|
>>> list(s['b']) |
|
['b1', 'b2', 'b3'] |
|
|
|
The original iterable will be advanced and its items will be cached until |
|
they are used by the child iterables. This may require significant storage. |
|
|
|
By default, attempting to select a bucket to which no items belong will |
|
exhaust the iterable and cache all values. |
|
If you specify a *validator* function, selected buckets will instead be |
|
checked against it. |
|
|
|
>>> from itertools import count |
|
>>> it = count(1, 2) # Infinite sequence of odd numbers |
|
>>> key = lambda x: x % 10 # Bucket by last digit |
|
>>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only |
|
>>> s = bucket(it, key=key, validator=validator) |
|
>>> 2 in s |
|
False |
|
>>> list(s[2]) |
|
[] |
|
|
|
""" |
|
|
|
def __init__(self, iterable, key, validator=None): |
|
self._it = iter(iterable) |
|
self._key = key |
|
self._cache = defaultdict(deque) |
|
self._validator = validator or (lambda x: True) |
|
|
|
def __contains__(self, value): |
|
if not self._validator(value): |
|
return False |
|
|
|
try: |
|
item = next(self[value]) |
|
except StopIteration: |
|
return False |
|
else: |
|
self._cache[value].appendleft(item) |
|
|
|
return True |
|
|
|
def _get_values(self, value): |
|
""" |
|
Helper to yield items from the parent iterator that match *value*. |
|
Items that don't match are stored in the local cache as they |
|
are encountered. |
|
""" |
|
while True: |
|
|
|
|
|
if self._cache[value]: |
|
yield self._cache[value].popleft() |
|
|
|
|
|
else: |
|
while True: |
|
try: |
|
item = next(self._it) |
|
except StopIteration: |
|
return |
|
item_value = self._key(item) |
|
if item_value == value: |
|
yield item |
|
break |
|
elif self._validator(item_value): |
|
self._cache[item_value].append(item) |
|
|
|
def __iter__(self): |
|
for item in self._it: |
|
item_value = self._key(item) |
|
if self._validator(item_value): |
|
self._cache[item_value].append(item) |
|
|
|
yield from self._cache.keys() |
|
|
|
def __getitem__(self, value): |
|
if not self._validator(value): |
|
return iter(()) |
|
|
|
return self._get_values(value) |
|
|