Spaces:

tfrere
/

leaderboard-parser-agent

Build error

App Files Files Community

leaderboard-parser-agent / src /leaderboard_processor.py

tfrere

add model factory

b0bf659 6 months ago

raw

history blame

6.54 kB

	"""
	Leaderboard processing.
	"""
	import datetime
	import os
	from src.agents.parser_agent import process_leaderboard
	from src.file_utils import create_category_slug, split_combined_id


	def normalize_category(category_name):
	"""
	Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase.

	Args:
	category_name: The category name to normalize

	Returns:
	The normalized category
	"""
	# Use the create_category_slug function from file_utils.py
	return create_category_slug(category_name)


	def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None):
	"""
	Process a single leaderboard and update the results.

	Args:
	uid: The UID of the leaderboard to process
	host: The URL of the leaderboard
	model: The model to use
	index: The index of the leaderboard
	all_results: The list of all results
	additional_rules: Additional specific rules for this leaderboard
	category: The category of the leaderboard (for combined identifier)

	Returns:
	The updated list of results
	"""
	print(f"\n\nProcessing leaderboard: {uid} - {host}")
	if additional_rules:
	print(f"Additional rules for this leaderboard: {additional_rules}")
	if category:
	normalized_category = normalize_category(category)
	print(f"Category: {category} (normalized: {normalized_category})")
	else:
	normalized_category = None

	# Get the maximum number of retries from environment variables
	max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3"))
	print(f"Maximum number of retries configured: {max_retries}")

	attempt = 0
	last_error = None

	# Try to process the leaderboard multiple times
	while attempt < max_retries:
	attempt += 1
	if attempt > 1:
	print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}")

	# Process the leaderboard
	result = process_leaderboard(host, model, index, uid, additional_rules)

	# If the parsing was successful or we've reached the maximum number of retries
	if result.get("parsing_status") == "success" or attempt >= max_retries:
	break

	# If there was an error, save it for later
	if result.get("parsing_status") == "error":
	last_error = result.get("parsing_message", "Unknown error")
	print(f"Error during attempt {attempt}: {last_error}")

	# Get parsing date from result or generate a new one if not available
	if result and "parsed_at" in result:
	parsed_at = result["parsed_at"]
	else:
	# Fallback to current time if not provided by process_leaderboard
	now = datetime.datetime.now()
	parsed_at = now.isoformat()

	# Create combined ID if category is provided
	result_uid = uid
	if normalized_category:
	# Format of the combined UID: category_uid
	# The category is already normalized (slugified) by normalize_category
	# The underscore "_" is the ONLY separator between the category and the UID
	result_uid = f"{normalized_category}_{uid}"

	# Create base result object with uid, host, and thumbnail
	leaderboard_result = {
	"uid": result_uid,
	"original_uid": uid,
	"category": normalized_category,
	"host": host,
	"parsing_status": "rejected", # Default to rejected
	"parsed_at": parsed_at
	}

	# Check if we have valid results
	valid_result = False
	if result and result.get("results"):
	if isinstance(result["results"], dict):
	# Check if we have top models with required fields
	if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0:
	valid_models = True
	for model_info in result["results"]["top_models"]:
	# Each model must have at least rank and name
	if not model_info.get("rank") or not model_info.get("name"):
	valid_models = False
	break

	# Check if we have evaluation criteria
	if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]:
	valid_result = True
	else:
	print(f"Invalid results format: {type(result['results']).__name__}, expected dict")
	else:
	print(f"Missing or empty results in agent response")

	# If we have valid results, extract the data
	if valid_result:
	leaderboard_result["parsing_status"] = "approved"
	leaderboard_result["top_models"] = []
	leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"]

	# Extract top models
	for model_info in result["results"]["top_models"]:
	model_entry = {
	"rank": model_info.get("rank"),
	"name": model_info.get("name"),
	"url": model_info.get("url", None)
	}
	leaderboard_result["top_models"].append(model_entry)
	else:
	print(f"Leaderboard rejected: {uid} - Incomplete or invalid information")

	# Check if this UID already exists in the results
	for i, existing_result in enumerate(all_results):
	if existing_result["uid"] == result_uid:
	# Replace the existing result
	all_results[i] = leaderboard_result
	print(f"Result updated for UID: {result_uid}")
	return all_results

	# ADDITIONAL CHECK: Make sure there's no confusion with other categories
	# for the same original_uid
	for existing_result in all_results:
	if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category:
	print(f"WARNING: A result already exists for original_uid {uid} but with a different category:")
	print(f" - Existing category: {existing_result['category']}, UID: {existing_result['uid']}")
	print(f" - New category: {normalized_category}, UID: {result_uid}")
	# We continue anyway, as it's a valid case to have the same leaderboard in different categories

	# If we get here, this is a new result
	all_results.append(leaderboard_result)
	print(f"New result added for UID: {result_uid}")
	return all_results