Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -99,8 +99,7 @@ def _sorted_split_key(split: str) -> str:
|
|
| 99 |
@lru_cache(maxsize=128)
|
| 100 |
def get_parquet_splits(dataset: str, config: str) -> List[str]:
|
| 101 |
fs = get_parquet_fs(dataset)
|
| 102 |
-
|
| 103 |
-
return sorted(set(parts[-4] if len(parts) > 3 and parts[-2] == "of" else parts[-1] for parts in all_parts), key=_sorted_split_key)
|
| 104 |
|
| 105 |
|
| 106 |
#####################################################
|
|
@@ -114,7 +113,7 @@ RowGroupReaders = List[Callable[[], pa.Table]]
|
|
| 114 |
@lru_cache(maxsize=128)
|
| 115 |
def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
|
| 116 |
fs = get_parquet_fs(dataset)
|
| 117 |
-
sources = fs.glob(f"{config}
|
| 118 |
if not sources:
|
| 119 |
if config not in get_parquet_configs(dataset):
|
| 120 |
raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
|
|
|
|
| 99 |
@lru_cache(maxsize=128)
|
| 100 |
def get_parquet_splits(dataset: str, config: str) -> List[str]:
|
| 101 |
fs = get_parquet_fs(dataset)
|
| 102 |
+
return [path for path in fs.ls(config) if fs.isdir(path)]
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
#####################################################
|
|
|
|
| 113 |
@lru_cache(maxsize=128)
|
| 114 |
def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
|
| 115 |
fs = get_parquet_fs(dataset)
|
| 116 |
+
sources = fs.glob(f"{config}/{split}/*.parquet")
|
| 117 |
if not sources:
|
| 118 |
if config not in get_parquet_configs(dataset):
|
| 119 |
raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
|