""" Utilities for interacting with the Hugging Face Hub. """ import os from huggingface_hub import HfApi, login, hf_hub_download def upload_to_hub(to_parse_file, results_file, repo_id=None): """ Uploads files to the Hugging Face Hub. Args: to_parse_file: Path to the categories file results_file: Path to the results file repo_id: Hub repository ID Returns: True if upload succeeded, False otherwise """ try: # Use environment variable HUGGING_FACE_STORAGE_REPO if available # Otherwise, use default value if repo_id is None: repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer") if os.getenv("HUGGING_FACE_STORAGE_REPO"): print(f"Using target dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}") else: print(f"No target dataset specified, using default value: {repo_id}") # Check if token is available token = os.getenv("HUGGING_FACE_HUB_TOKEN") if not token: print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.") return False # Connect to Hub print("Connecting to Hugging Face Hub...") login(token=token) api = HfApi() # Upload JSON files print(f"\n--- UPLOADING CATEGORIES FILE ---") print(f"Local file: {to_parse_file}") print(f"Destination: {repo_id}/best_model_for_category_list.json") print(f"Uploading...") try: api.upload_file( path_or_fileobj=to_parse_file, path_in_repo="best_model_for_category_list.json", repo_id=repo_id, repo_type="dataset", commit_message="Update leaderboard categories" ) print(f"Upload of {to_parse_file} successful!") except Exception as e: print(f"Note when uploading {to_parse_file}: {e}") if "No files have been modified since last commit" in str(e): print("→ The categories file is identical to the one already on the Hub. No changes needed.") else: print(f"→ ERROR: Upload failed for another reason.") raise e print(f"\n--- UPLOADING RESULTS FILE ---") print(f"Local file: {results_file}") print(f"Destination: {repo_id}/best_model_for_results.json") print(f"Uploading...") try: api.upload_file( path_or_fileobj=results_file, path_in_repo="best_model_for_results.json", repo_id=repo_id, repo_type="dataset", commit_message="Update leaderboard results" ) print(f"Upload of {results_file} successful!") except Exception as e: print(f"Note when uploading {results_file}: {e}") if "No files have been modified since last commit" in str(e): print("→ The results file is identical to the one already on the Hub. No changes needed.") else: print(f"→ ERROR: Upload failed for another reason.") raise e print(f"\nUpload operation completed: files have been processed!") return True except Exception as e: print(f"GENERAL ERROR during file upload to Hub: {e}") return False def download_from_hub(repo_id=None): """ Downloads files from the Hugging Face Hub. Args: repo_id: Hub repository ID Returns: True if download succeeded, False otherwise """ try: # Use environment variable HUGGING_FACE_STORAGE_REPO if available # Otherwise, use default value if repo_id is None: repo_id = os.getenv("HUGGING_FACE_STORAGE_REPO", "leaderboard-explorer/leaderboard_explorer") if os.getenv("HUGGING_FACE_STORAGE_REPO"): print(f"Using source dataset specified in HUGGING_FACE_STORAGE_REPO: {repo_id}") else: print(f"No source dataset specified, using default value: {repo_id}") # Check if token is available token = os.getenv("HUGGING_FACE_HUB_TOKEN") if not token: print("ERROR: Environment variable HUGGING_FACE_HUB_TOKEN is not defined.") return False # Connect to Hub login(token=token) # Create data directory if it doesn't exist script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) data_dir = os.path.join(script_dir, "data") os.makedirs(data_dir, exist_ok=True) # List of required and optional files required_files = [ "final_leaderboards.json", "best_model_for_category_list.json" ] optional_files = [ "best_model_for_results.json" ] # Download required files first for filename in required_files: local_path = os.path.join(data_dir, filename) try: # Download file print(f"Downloading {filename} from {repo_id}...") hf_hub_download( repo_id=repo_id, filename=filename, repo_type="dataset", local_dir=data_dir, local_dir_use_symlinks=False ) print(f"File {filename} successfully downloaded to {local_path}") except Exception as e: print(f"ERROR: Unable to download required file {filename}: {e}") return False # Download optional files next for filename in optional_files: local_path = os.path.join(data_dir, filename) try: print(f"Downloading {filename} from {repo_id}...") hf_hub_download( repo_id=repo_id, filename=filename, repo_type="dataset", local_dir=data_dir, local_dir_use_symlinks=False ) print(f"File {filename} successfully downloaded to {local_path}") except Exception as e: print(f"WARNING: Unable to download optional file {filename}: {e}") print(f"This is not a problem, a new file will be created if necessary.") return True except Exception as e: print(f"ERROR during file download from Hub: {e}") return False