Upload catalog.py with huggingface_hub
Browse files- catalog.py +39 -1
catalog.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Optional
|
| 5 |
|
| 6 |
import requests
|
| 7 |
|
| 8 |
-
from .artifact import Artifact, Artifactory, reset_artifacts_cache
|
| 9 |
from .logging_utils import get_logger
|
|
|
|
| 10 |
from .version import version
|
| 11 |
|
| 12 |
logger = get_logger()
|
|
@@ -35,6 +37,7 @@ default_catalog_path = os.path.join(lib_dir, "catalog")
|
|
| 35 |
class LocalCatalog(Catalog):
|
| 36 |
name: str = "local"
|
| 37 |
location: str = default_catalog_path
|
|
|
|
| 38 |
|
| 39 |
def path(self, artifact_identifier: str):
|
| 40 |
assert (
|
|
@@ -92,6 +95,7 @@ class GithubCatalog(LocalCatalog):
|
|
| 92 |
repo = "unitxt"
|
| 93 |
repo_dir = "src/unitxt/catalog"
|
| 94 |
user = "IBM"
|
|
|
|
| 95 |
|
| 96 |
def prepare(self):
|
| 97 |
tag = version
|
|
@@ -133,3 +137,37 @@ def add_to_catalog(
|
|
| 133 |
artifact, name, overwrite=overwrite, verbose=verbose
|
| 134 |
) # remove collection (its actually the dir).
|
| 135 |
# verify name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
+
from collections import Counter
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Optional
|
| 6 |
|
| 7 |
import requests
|
| 8 |
|
| 9 |
+
from .artifact import Artifact, Artifactories, Artifactory, reset_artifacts_cache
|
| 10 |
from .logging_utils import get_logger
|
| 11 |
+
from .text_utils import print_dict
|
| 12 |
from .version import version
|
| 13 |
|
| 14 |
logger = get_logger()
|
|
|
|
| 37 |
class LocalCatalog(Catalog):
|
| 38 |
name: str = "local"
|
| 39 |
location: str = default_catalog_path
|
| 40 |
+
is_local: bool = True
|
| 41 |
|
| 42 |
def path(self, artifact_identifier: str):
|
| 43 |
assert (
|
|
|
|
| 95 |
repo = "unitxt"
|
| 96 |
repo_dir = "src/unitxt/catalog"
|
| 97 |
user = "IBM"
|
| 98 |
+
is_local: bool = False
|
| 99 |
|
| 100 |
def prepare(self):
|
| 101 |
tag = version
|
|
|
|
| 137 |
artifact, name, overwrite=overwrite, verbose=verbose
|
| 138 |
) # remove collection (its actually the dir).
|
| 139 |
# verify name
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def get_local_catalogs_paths():
|
| 143 |
+
result = []
|
| 144 |
+
for artifactory in Artifactories():
|
| 145 |
+
if isinstance(artifactory, LocalCatalog):
|
| 146 |
+
if artifactory.is_local:
|
| 147 |
+
result.append(artifactory.location)
|
| 148 |
+
return result
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def count_files_recursively(folder):
|
| 152 |
+
file_count = 0
|
| 153 |
+
for _, _, files in os.walk(folder):
|
| 154 |
+
file_count += len(files)
|
| 155 |
+
return file_count
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def local_catalog_summary(catalog_path):
|
| 159 |
+
result = {}
|
| 160 |
+
|
| 161 |
+
for dir in os.listdir(catalog_path):
|
| 162 |
+
if os.path.isdir(os.path.join(catalog_path, dir)):
|
| 163 |
+
result[dir] = count_files_recursively(os.path.join(catalog_path, dir))
|
| 164 |
+
|
| 165 |
+
return result
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def summary():
|
| 169 |
+
result = Counter()
|
| 170 |
+
for local_catalog_path in get_local_catalogs_paths():
|
| 171 |
+
result += Counter(local_catalog_summary(local_catalog_path))
|
| 172 |
+
print_dict(result)
|
| 173 |
+
return result
|