tfrere's picture
first commit
0821095
raw
history blame
6.73 kB
"""
Utilities for interacting with the Hugging Face Hub.
"""
import os
from huggingface_hub import HfApi, login, hf_hub_download
def upload_to_hub(to_parse_file, results_file, repo_id=None):
"""
Uploads files to the Hugging Face Hub.
Args:
to_parse_file: Path to the categories file
results_file: Path to the results file
repo_id: Hub repository ID
Returns:
True if upload succeeded, False otherwise
"""
try:
# Use environment variable HUGGING_FACE_STORAGE_REPO if available
# Otherwise, use default value
if repo_id is None:
repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer")
if os.getenv("HUGGING_FACE_STORAGE_REPO"):
print(f"Using target dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}")
else:
print(f"No target dataset specified, using default value: {repo_id}")
# Check if token is available
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
if not token:
print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.")
return False
# Connect to Hub
print("Connecting to Hugging Face Hub...")
login(token=token)
api = HfApi()
# Upload JSON files
print(f"\n--- UPLOADING CATEGORIES FILE ---")
print(f"Local file: {to_parse_file}")
print(f"Destination: {repo_id}/best_model_for_category_list.json")
print(f"Uploading...")
try:
api.upload_file(
path_or_fileobj=to_parse_file,
path_in_repo="best_model_for_category_list.json",
repo_id=repo_id,
repo_type="dataset",
commit_message="Update leaderboard categories"
)
print(f"Upload of {to_parse_file} successful!")
except Exception as e:
print(f"Note when uploading {to_parse_file}: {e}")
if "No files have been modified since last commit" in str(e):
print("β†’ The categories file is identical to the one already on the Hub. No changes needed.")
else:
print(f"β†’ ERROR: Upload failed for another reason.")
raise e
print(f"\n--- UPLOADING RESULTS FILE ---")
print(f"Local file: {results_file}")
print(f"Destination: {repo_id}/best_model_for_results.json")
print(f"Uploading...")
try:
api.upload_file(
path_or_fileobj=results_file,
path_in_repo="best_model_for_results.json",
repo_id=repo_id,
repo_type="dataset",
commit_message="Update leaderboard results"
)
print(f"Upload of {results_file} successful!")
except Exception as e:
print(f"Note when uploading {results_file}: {e}")
if "No files have been modified since last commit" in str(e):
print("β†’ The results file is identical to the one already on the Hub. No changes needed.")
else:
print(f"β†’ ERROR: Upload failed for another reason.")
raise e
print(f"\nUpload operation completed: files have been processed!")
return True
except Exception as e:
print(f"GENERAL ERROR during file upload to Hub: {e}")
return False
def download_from_hub(repo_id=None):
"""
Downloads files from the Hugging Face Hub.
Args:
repo_id: Hub repository ID
Returns:
True if download succeeded, False otherwise
"""
try:
# Use environment variable HUGGING_FACE_STORAGE_REPO if available
# Otherwise, use default value
if repo_id is None:
repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer")
if os.getenv("HUGGING_FACE_STORAGE_REPO"):
print(f"Using source dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}")
else:
print(f"No source dataset specified, using default value: {repo_id}")
# Check if token is available
token = os.getenv("HUGGING_FACE_HUB_TOKEN")
if not token:
print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.")
return False
# Connect to Hub
login(token=token)
# Create data directory if it doesn't exist
script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
data_dir = os.path.join(script_dir, "data")
os.makedirs(data_dir, exist_ok=True)
# List of required and optional files
required_files = [
"final_leaderboards.json",
"best_model_for_category_list.json"
]
optional_files = [
"best_model_for_results.json"
]
# Download required files first
for filename in required_files:
local_path = os.path.join(data_dir, filename)
try:
# Download file
print(f"Downloading {filename} from {repo_id}...")
hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type="dataset",
local_dir=data_dir,
local_dir_use_symlinks=False
)
print(f"File {filename} successfully downloaded to {local_path}")
except Exception as e:
print(f"ERROR: Unable to download required file {filename}: {e}")
return False
# Download optional files next
for filename in optional_files:
local_path = os.path.join(data_dir, filename)
try:
print(f"Downloading {filename} from {repo_id}...")
hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type="dataset",
local_dir=data_dir,
local_dir_use_symlinks=False
)
print(f"File {filename} successfully downloaded to {local_path}")
except Exception as e:
print(f"WARNING: Unable to download optional file {filename}: {e}")
print(f"This is not a problem, a new file will be created if necessary.")
return True
except Exception as e:
print(f"ERROR during file download from Hub: {e}")
return False