tfrere's picture
first commit
0821095
raw
history blame
3.34 kB
#!/usr/bin/env python
"""
Main script for the leaderboard parser.
This script processes leaderboards specified in data/best_model_for_category_list.json file
by matching their UIDs with hosts in data/final_leaderboards.json.
Environment variables:
HUGGING_FACE_HUB_TOKEN: Authentication token for Hugging Face Hub (required)
HUGGING_FACE_STORAGE_REPO: Target dataset name on the Hub (optional, default: leaderboard-explorer/leaderboard_explorer)
LEADERBOARD_REPROCESS_INTERVAL_HOURS: Interval in hours between leaderboard processing runs (default: 24)
"""
import argparse
import logging
from dotenv import load_dotenv
import uvicorn
import sys
# Import from src modules
from src.processor import process_leaderboards
from src.server import app, initialize_server
from src.scheduler import initialize_scheduler, start_scheduler
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("leaderboard-parser")
def main():
"""
Main function to process leaderboards specified in best_model_for_category_list.json.
"""
# Parse command line arguments
parser = argparse.ArgumentParser(description="Leaderboard Parser")
parser.add_argument("--clean", action="store_true", help="Clean the results file before starting")
parser.add_argument("--force-retry-uid", help="Force retry for a specific leaderboard UID")
parser.add_argument("--force-retry-category", help="Force retry for all leaderboards of a specific category")
parser.add_argument("--upload-only", action="store_true", help="Only upload local files to the Hub without processing leaderboards")
parser.add_argument("--local-only", action="store_true", help="Local mode only: do not download from the Hub and do not upload to the Hub")
parser.add_argument("--retry-rejected", action="store_true", help="Force reprocessing of rejected leaderboards even if it's been less than 24h")
parser.add_argument("--server", action="store_true", help="Run as a web server with scheduled processing")
args = parser.parse_args()
# Load environment variables
load_dotenv()
# Check if we should run in server mode
if args.server:
run_server_mode()
return
# Convert args to dict for process_leaderboards
args_dict = vars(args)
# Process the leaderboards
success, message = process_leaderboards(args_dict)
if success:
logger.info(message)
return 0
else:
logger.error(message)
return 1
def run_server_mode():
"""Run the application in server mode with periodic processing"""
# Initialize server and scheduler with the process_leaderboards function
initialize_server(process_leaderboards)
initialize_scheduler(process_leaderboards)
# Start the scheduler thread
scheduler = start_scheduler()
try:
# Log startup information
logger.info("Running in server mode with periodic processing")
# Run the FastAPI server
uvicorn.run(app, host="0.0.0.0", port=8000)
except KeyboardInterrupt:
logger.info("Server stopped by user")
except Exception as e:
logger.error(f"Error running server: {e}")
if __name__ == "__main__":
sys.exit(main())