#!/usr/bin/env python """ Main script for the leaderboard parser. This script processes leaderboards specified in data/best_model_for_category_list.json file by matching their UIDs with hosts in data/final_leaderboards.json. Environment variables: HUGGING_FACE_HUB_TOKEN: Authentication token for Hugging Face Hub (required) HUGGING_FACE_STORAGE_REPO: Target dataset name on the Hub (optional, default: leaderboard-explorer/leaderboard_explorer) LEADERBOARD_REPROCESS_INTERVAL_HOURS: Interval in hours between leaderboard processing runs (default: 24) """ import argparse import logging from dotenv import load_dotenv import uvicorn import sys import os # Import from src modules from src.processor import process_leaderboards from src.server import app, initialize_server from src.scheduler import initialize_scheduler, start_scheduler # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) logger = logging.getLogger("leaderboard-parser") def main(): """ Main function to process leaderboards specified in best_model_for_category_list.json. """ # Parse command line arguments parser = argparse.ArgumentParser(description="Leaderboard Parser") parser.add_argument("--clean", action="store_true", help="Clean the results file before starting") parser.add_argument("--force-retry-uid", help="Force retry for a specific leaderboard UID") parser.add_argument("--force-retry-category", help="Force retry for all leaderboards of a specific category") parser.add_argument("--upload-only", action="store_true", help="Only upload local files to the Hub without processing leaderboards") parser.add_argument("--local-only", action="store_true", help="Local mode only: do not download from the Hub and do not upload to the Hub") parser.add_argument("--ignore-cooldown", action="store_true", help="Force reprocessing of rejected leaderboards even if it's been less than 24h") parser.add_argument("--server", action="store_true", help="Run as a web server with scheduled processing") args = parser.parse_args() # Load environment variables load_dotenv() # Check if we should run in server mode if args.server: run_server_mode(args) return # Convert args to dict for process_leaderboards args_dict = vars(args) # Process the leaderboards success, message = process_leaderboards(args_dict) if success: logger.info(message) return 0 else: logger.error(message) return 1 def run_server_mode(args): """Run the application in server mode with periodic processing""" # Convert command line arguments to dictionary args_dict = vars(args) # Initialize server and scheduler with the process_leaderboards function initialize_server(process_leaderboards) initialize_scheduler(process_leaderboards, args_dict) # Start the scheduler thread scheduler = start_scheduler() try: # Log startup information logger.info("Running in server mode with periodic processing") # Get port from environment variable or use default HF Spaces port port = int(os.environ.get("PORT", 7860)) logger.info(f"Starting server on port {port}") # Run the FastAPI server uvicorn.run(app, host="0.0.0.0", port=port) except KeyboardInterrupt: logger.info("Server stopped by user") except Exception as e: logger.error(f"Error running server: {e}") if __name__ == "__main__": sys.exit(main())