Upload split_utils.py with huggingface_hub
Browse files- split_utils.py +4 -1
split_utils.py
CHANGED
|
@@ -30,7 +30,7 @@ def parse_random_mix_string(input_str):
|
|
| 30 |
"""
|
| 31 |
|
| 32 |
if not re.fullmatch(r"(([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)\+)*([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)", input_str):
|
| 33 |
-
raise ValueError("Invalid input format")
|
| 34 |
|
| 35 |
pattern = re.compile(r"([a-zA-Z]+)(\[\d*\.?\d*%?\])?")
|
| 36 |
matches = pattern.findall(input_str)
|
|
@@ -227,6 +227,9 @@ def random_mix_generator(new_stream_name, new_stream_sources, stream_routing, in
|
|
| 227 |
for old_stream_name in new_stream_sources:
|
| 228 |
optinal_streams, weights = stream_routing[old_stream_name]
|
| 229 |
with nested_seed(old_stream_name) as rand:
|
|
|
|
|
|
|
|
|
|
| 230 |
for item in input_streams[old_stream_name]:
|
| 231 |
choice = rand.choices(optinal_streams, weights=weights, k=1)[0]
|
| 232 |
if choice == new_stream_name:
|
|
|
|
| 30 |
"""
|
| 31 |
|
| 32 |
if not re.fullmatch(r"(([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)\+)*([a-zA-Z]+\[\d*\.?\d*%?\]|[a-zA-Z]+)", input_str):
|
| 33 |
+
raise ValueError(f"Invalid input format for split '{input_str}'")
|
| 34 |
|
| 35 |
pattern = re.compile(r"([a-zA-Z]+)(\[\d*\.?\d*%?\])?")
|
| 36 |
matches = pattern.findall(input_str)
|
|
|
|
| 227 |
for old_stream_name in new_stream_sources:
|
| 228 |
optinal_streams, weights = stream_routing[old_stream_name]
|
| 229 |
with nested_seed(old_stream_name) as rand:
|
| 230 |
+
assert (
|
| 231 |
+
old_stream_name in input_streams
|
| 232 |
+
), f"'{old_stream_name}' split not found. Possibles options: {input_streams.keys()}"
|
| 233 |
for item in input_streams[old_stream_name]:
|
| 234 |
choice = rand.choices(optinal_streams, weights=weights, k=1)[0]
|
| 235 |
if choice == new_stream_name:
|