Spaces:
Build error
Build error
File size: 6,729 Bytes
0821095 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
"""
Utilities for interacting with the Hugging Face Hub.
"""
import os
from huggingface_hub import HfApi, login, hf_hub_download
def upload_to_hub(to_parse_file, results_file, repo_id=None):
"""
Uploads files to the Hugging Face Hub.
Args:
to_parse_file: Path to the categories file
results_file: Path to the results file
repo_id: Hub repository ID
Returns:
True if upload succeeded, False otherwise
"""
try:
# Use environment variable HUGGING_FACE_STORAGE_REPO if available
# Otherwise, use default value
if repo_id is None:
repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer")
if os.getenv("HUGGING_FACE_STORAGE_REPO"):
print(f"Using target dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}")
else:
print(f"No target dataset specified, using default value: {repo_id}")
# Check if token is available
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
if not token:
print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.")
return False
# Connect to Hub
print("Connecting to Hugging Face Hub...")
login(token=token)
api = HfApi()
# Upload JSON files
print(f"\n--- UPLOADING CATEGORIES FILE ---")
print(f"Local file: {to_parse_file}")
print(f"Destination: {repo_id}/best_model_for_category_list.json")
print(f"Uploading...")
try:
api.upload_file(
path_or_fileobj=to_parse_file,
path_in_repo="best_model_for_category_list.json",
repo_id=repo_id,
repo_type="dataset",
commit_message="Update leaderboard categories"
)
print(f"Upload of {to_parse_file} successful!")
except Exception as e:
print(f"Note when uploading {to_parse_file}: {e}")
if "No files have been modified since last commit" in str(e):
print("β The categories file is identical to the one already on the Hub. No changes needed.")
else:
print(f"β ERROR: Upload failed for another reason.")
raise e
print(f"\n--- UPLOADING RESULTS FILE ---")
print(f"Local file: {results_file}")
print(f"Destination: {repo_id}/best_model_for_results.json")
print(f"Uploading...")
try:
api.upload_file(
path_or_fileobj=results_file,
path_in_repo="best_model_for_results.json",
repo_id=repo_id,
repo_type="dataset",
commit_message="Update leaderboard results"
)
print(f"Upload of {results_file} successful!")
except Exception as e:
print(f"Note when uploading {results_file}: {e}")
if "No files have been modified since last commit" in str(e):
print("β The results file is identical to the one already on the Hub. No changes needed.")
else:
print(f"β ERROR: Upload failed for another reason.")
raise e
print(f"\nUpload operation completed: files have been processed!")
return True
except Exception as e:
print(f"GENERAL ERROR during file upload to Hub: {e}")
return False
def download_from_hub(repo_id=None):
"""
Downloads files from the Hugging Face Hub.
Args:
repo_id: Hub repository ID
Returns:
True if download succeeded, False otherwise
"""
try:
# Use environment variable HUGGING_FACE_STORAGE_REPO if available
# Otherwise, use default value
if repo_id is None:
repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer")
if os.getenv("HUGGING_FACE_STORAGE_REPO"):
print(f"Using source dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}")
else:
print(f"No source dataset specified, using default value: {repo_id}")
# Check if token is available
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
if not token:
print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.")
return False
# Connect to Hub
login(token=token)
# Create data directory if it doesn't exist
script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_dir = os.path.join(script_dir, "data")
os.makedirs(data_dir, exist_ok=True)
# List of required and optional files
required_files = [
"final_leaderboards.json",
"best_model_for_category_list.json"
]
optional_files = [
"best_model_for_results.json"
]
# Download required files first
for filename in required_files:
local_path = os.path.join(data_dir, filename)
try:
# Download file
print(f"Downloading {filename} from {repo_id}...")
hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type="dataset",
local_dir=data_dir,
local_dir_use_symlinks=False
)
print(f"File {filename} successfully downloaded to {local_path}")
except Exception as e:
print(f"ERROR: Unable to download required file {filename}: {e}")
return False
# Download optional files next
for filename in optional_files:
local_path = os.path.join(data_dir, filename)
try:
print(f"Downloading {filename} from {repo_id}...")
hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type="dataset",
local_dir=data_dir,
local_dir_use_symlinks=False
)
print(f"File {filename} successfully downloaded to {local_path}")
except Exception as e:
print(f"WARNING: Unable to download optional file {filename}: {e}")
print(f"This is not a problem, a new file will be created if necessary.")
return True
except Exception as e:
print(f"ERROR during file download from Hub: {e}")
return False |