Spaces:

tfrere
/

leaderboard-parser-agent

Build error

File size: 17,489 Bytes
"""
Tools for the leaderboard agent.
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import re
import time
import helium

from smolagents import tool


@tool
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
    """
    Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
    Args:
        text: The text to search for
        nth_result: Which occurrence to jump to (default: 1)
    """
    from src.agents.browser import driver
    
    elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
    if nth_result > len(elements):
        raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
    result = f"Found {len(elements)} matches for '{text}'."
    elem = elements[nth_result - 1]
    driver.execute_script("arguments[0].scrollIntoView(true);", elem)
    result += f"Focused on element {nth_result} of {len(elements)}"
    return result


@tool
def go_back() -> str:
    """
    Navigate back to the previous page.
    """
    from src.agents.browser import driver
    
    driver.back()
    time.sleep(2)  # Wait for page to load
    return "Navigated back to previous page"


@tool
def close_popups() -> str:
    """
    Closes any popup/modal dialogs that might be open on the page.
    Useful when pop-ups appear (cookies, login prompts, etc.) that block interaction.
    """
    from src.agents.browser import driver
    
    # Try to find common popup elements
    popup_selectors = [
        "//button[contains(text(), 'Accept')]",
        "//button[contains(text(), 'Close')]", 
        "//button[contains(text(), 'Fermer')]",
        "//button[contains(text(), 'OK')]",
        "//button[contains(text(), 'Got it')]",
        "//button[contains(@class, 'close')]",
        "//div[contains(@class, 'popup')]//button",
        "//div[contains(@class, 'modal')]//button",
        "//div[contains(@class, 'dialog')]//button"
    ]
    
    found = False
    for selector in popup_selectors:
        try:
            popup_elements = driver.find_elements(By.XPATH, selector)
            for elem in popup_elements:
                if elem.is_displayed():
                    elem.click()
                    found = True
                    time.sleep(0.5)  # Wait for popup to disappear
        except Exception as e:
            pass  # Ignore errors, try next selector
    
    return "Closed popup dialogs" if found else "No popup dialogs found"


@tool
def extract_table_data(table_caption: str = None, table_index: int = 1) -> str:
    """
    Extracts data from a table on the page. Can find a table by caption/title or by index.
    Args:
        table_caption: Text in or near the table to find (default: None - will use index)
        table_index: The index of the table if caption is not provided (1-based)
    """
    from src.agents.browser import driver
    
    tables = driver.find_elements(By.TAG_NAME, "table")
    if not tables:
        return "No tables found on the page."
    
    result = f"Found {len(tables)} table(s) on the page.\n"
    
    for i, table in enumerate(tables):
        result += f"\nTable {i+1}:\n"
        
        # Try to get headers
        headers = table.find_elements(By.TAG_NAME, "th")
        if headers:
            header_texts = [header.text for header in headers]
            result += f"Headers: {', '.join(header_texts)}\n"
        
        # Get rows
        rows = table.find_elements(By.TAG_NAME, "tr")
        result += f"Found {len(rows)} rows.\n"
        
        # Get first 5 rows as sample
        for j, row in enumerate(rows[:5]):
            cells = row.find_elements(By.TAG_NAME, "td")
            if cells:
                cell_texts = [cell.text for cell in cells]
                result += f"Row {j+1}: {' | '.join(cell_texts)}\n"
    
    return result


@tool
def find_leaderboard_elements() -> str:
    """
    Find key elements of a leaderboard: title, evaluation criteria, and model rankings.
    Returns a structured description of what was found.
    """
    from src.agents.browser import driver
    
    result = ""
    
    # Check for tables first
    tables = driver.find_elements(By.TAG_NAME, "table")
    if tables:
        result += f"Found {len(tables)} table(s) that might contain leaderboard data.\n"
    
    # Check for ordered lists
    ol_elements = driver.find_elements(By.TAG_NAME, "ol")
    if ol_elements:
        result += f"Found {len(ol_elements)} ordered list(s) that might contain rankings.\n"
    
    # Check for div elements with grid or flex display that might be custom leaderboards
    grid_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'grid') or contains(@class, 'flex') or contains(@class, 'table') or contains(@class, 'rank') or contains(@class, 'leaderboard')]")
    if grid_elements:
        result += f"Found {len(grid_elements)} div elements with grid/flex/table classes that might be custom leaderboards.\n"
    
    # Look for elements with rank or position indicators
    rank_elements = driver.find_elements(By.XPATH, "//*[contains(@class, 'rank') or contains(@class, 'position') or contains(@class, 'standing')]")
    if rank_elements:
        result += f"Found {len(rank_elements)} elements with rank/position classes.\n"
    
    if not result:
        return "Could not find any obvious leaderboard elements. Try scrolling or navigating to the correct section."
    
    return result

@tool
def map_clickable_elements(keyword: str = None) -> str:
    """
    Displays a list of all clickable elements on the page with their coordinates.
    
    Args:
        keyword: Optional keyword to filter elements. If specified, only elements containing this keyword will be displayed.
    
    Returns:
        A string listing all clickable elements with their coordinates.
    """
    from src.agents.browser import driver
    
    clickable_selectors = [
        "a", "button", "input[type='button']", "input[type='submit']", 
        ".clickable", "[role='button']", "[onclick]"
    ]
    
    result = "Éléments cliquables détectés:\n"
    total = 0
    
    for selector in clickable_selectors:
        elements = driver.find_elements(By.CSS_SELECTOR, selector)
        for i, element in enumerate(elements):
            try:
                text = element.text.strip()
                if not text and element.get_attribute("value"):
                    text = element.get_attribute("value")
                
                # Ignorer les éléments vides ou non visibles
                if not text or not element.is_displayed():
                    continue
                    
                # Filtrer par mot-clé si spécifié
                if keyword and keyword.lower() not in text.lower():
                    continue
                
                rect = element.rect
                x = int(rect['x'] + rect['width']/2)
                y = int(rect['y'] + rect['height']/2)
                
                result += f"{total+1}. '{text}' ({selector}) - coords: x={x}, y={y}\n"
                total += 1
            except:
                continue
    
    result += f"\nTotal: {total} éléments cliquables" + (" contenant '" + keyword + "'" if keyword else "")
    return result

@tool
def copy_link_from_element(text_to_find: str, link_position: int = 1) -> str:
    """
    Find elements with specified text and return the URL if it's a link or has a parent link.
    Args:
        text_to_find: Text to search for
        link_position: If multiple matches, which one to use (1-based)
    """
    from src.agents.browser import driver
    
    try:
        # Try to find an element with the given text
        element = driver.find_element_by_xpath(f"//*[contains(text(), '{text_to_find}')]")
        if not element:
            return f"No element containing the text '{text_to_find}' was found."
        
        # Try to find URL directly from the element
        href = element.get_attribute("href")
        if href:
            return f"URL found: {href}"
        
        # Try to find a parent that is a link
        parent = element.find_element_by_xpath("./ancestor::a")
        if parent:
            href = parent.get_attribute("href")
            if href:
                return f"URL found in parent element: {href}"
        
        # Try to find a child that is a link
        child = element.find_element_by_xpath(".//a")
        if child:
            href = child.get_attribute("href")
            if href:
                return f"URL found in child element: {href}"
        
        # Méthode 4: Essayer le clic droit et "Copier l'adresse du lien"
        actions = ActionChains(driver)
        actions.context_click(element).perform()
        
        # Attendre un peu pour que le menu contextuel s'affiche
        import time
        time.sleep(1)
        
        # Essayer de trouver et cliquer sur "Copier l'adresse du lien" ou équivalent
        # Note: Cette partie est très dépendante du navigateur et de la langue
        copy_link_texts = ["Copy link address", "Copier l'adresse du lien", "Copy Link", "Copier le lien"]
        
        for text in copy_link_texts:
            try:
                link_option = driver.find_element(By.XPATH, f"//div[contains(text(), '{text}')]")
                link_option.click()
                return f"Action 'Copier l'adresse du lien' effectuée pour '{text_to_find}'"
            except:
                continue
        
        # Annuler le menu contextuel
        webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
        
        return f"Impossible de trouver un lien pour l'élément '{text_to_find}' avec les méthodes disponibles."
    
    except Exception as e:
        return f"Erreur lors de la recherche du lien: {str(e)}"

@tool
def validate_json_results(result: dict) -> tuple[bool, str]:
    """
    Checks that the results do not contain generic placeholders.
    Args:
        result: The result to validate
    Returns:
        A tuple containing a boolean indicating if the result is valid and a message
        explaining why the result is invalid if it is not valid.
    """
    if not result or not isinstance(result, dict):
        return False, "Invalid result"
    
    if "top_models" not in result or len(result.get("top_models", [])) < 3:
        return False, "Less than 3 models found"
    
    # Check for duplicate models
    seen_models = set()
    for model in result.get("top_models", []):
        model_name = model.get("name", "").lower()
        if model_name in seen_models:
            return False, f"Duplicate model '{model.get('name')}' found. Please ensure each model is unique."
        seen_models.add(model_name)
    
    # Check for generic names
    generic_names = ["model a", "model b", "model c", "model 1", "model 2", "model 3", "model name", "unavailable"]
    model_names = [m.get("name", "").lower() for m in result.get("top_models", [])]
    if any(name in generic_names for name in model_names):
        return False, "Generic model names detected"
    
    # Check for unwanted suffixes in model names
    unwanted_suffix_pattern = r"\(.*\)$"
    for model in result.get("top_models", []):
        if re.search(unwanted_suffix_pattern, model.get("name", "")):
            return False, f"Model name '{model.get('name')}' contains unwanted suffixes. Please remove them if you think they are not part of the model name. If it's a version number or a date, keep it."
    
    # Check for generic URLs
    generic_urls = ["example.com", "example.org"]
    model_urls = [m.get("url", "").lower() for m in result.get("top_models", []) if m.get("url") is not None]
    if any(generic in url for url in model_urls for generic in generic_urls):
        return False, "Generic URLs detected"
    
    # Check for submatch between model name and URL
    for model in result.get("top_models", []):
        name = model.get("name", "").lower()
        url = model.get("url")
        
        # Skip validation if URL is None or empty - this is acceptable, so no warning
        if not url:
            continue
        
        url = url.lower()
        if url and not any(name[i:i+4] in url for i in range(len(name) - 3)):
            return False, f"URL for model '{model.get('name')}' does not have a valid submatch with the name. This is probably a wrong URL. Please check the URL and try again."
    
    # Check the evaluation criterion
    if "evaluation_criteria" not in result or len(result.get("evaluation_criteria", "")) < 10:
        return False, "Evaluation criterion missing or too short"
    
    return True, "Valid results"

@tool
def find_model_links(model_name: str) -> str:
    """
    Search for links that might point to a model based on their URL
    and their match with the model name.
    Args:
        model_name: The name of the model to search for
    
    Returns:
        A list of potential links to the model
    """
    from src.agents.browser import driver
    try:
        # 1. Retrieve all links on the page
        all_links = driver.find_elements(By.TAG_NAME, "a")
        if not all_links:
            return "No links were found on the page."
        
        # 2. Known patterns for model repositories
        model_url_patterns = [
            r'huggingface\.co/[^/]+/[^/]+',  # Hugging Face model repo
            r'github\.com/[^/]+/[^/]+',      # GitHub repo
        ]
        
        model_links = []
        model_name_lower = model_name.lower()
        
        for link in all_links:
            try:
                # Check if the link is visible and has an href attribute
                if not link.is_displayed() or not link.get_attribute('href'):
                    continue
                
                link_url = link.get_attribute('href')
                link_text = link.text.strip()
                
                # Ignore links to non-relevant resources
                if link_url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.css', '.js')):
                    continue
                
                # Check if the URL matches a known pattern
                matches_pattern = any(re.search(pattern, link_url, re.IGNORECASE) for pattern in model_url_patterns)
                
                if matches_pattern:
                    # Check for a 3-character submatch between the model name and the URL
                    url_lower = link_url.lower()
                    has_submatch = False
                    
                    # Search for a 3-character submatch in the model name
                    for i in range(len(model_name_lower) - 4):
                        if model_name_lower[i:i+5] in url_lower and model_name_lower[i:i+5] in link_text.lower():
                            has_submatch = True
                            break

                    if has_submatch:
                        # Calculate the confidence based on character matches
                        confidence = sum(1 for c in model_name_lower if c in link_text.lower())
                        model_links.append({
                            'url': link_url,
                            'text': link_text,
                            'confidence': confidence
                        })
            except Exception as e:
                continue  # Ignore errors and continue
        
        # 3. Format the result
        if not model_links:
            return f"No potential links to the model '{model_name}' were found."
        
        result = f"Found {len(model_links)} potential links for the model '{model_name}':\n\n"
        
        for i, link in enumerate(model_links):
            result += f"Candidate {i+1}:\n"
            result += f"URL: {link['url']}\n"
            result += f"Text: {link['text']}\n"
            result += f"Confidence: {link['confidence']}\n\n"
        
        # 4. Suggest the best candidate (the one with the highest confidence)
        if model_links:
            best_candidate = max(model_links, key=lambda x: x['confidence'])
            result += f"Best candidate for '{model_name}':\nURL: {best_candidate['url']}\nText: {best_candidate['text']} "
        
        return result
    except Exception as e:
        return f"Error while searching for links for the model '{model_name}': {str(e)}"

@tool
def click_at_coordinates(x: int, y: int) -> str:
    """
    Clicks at the specified x,y coordinates on the page.
    This is useful when other targeting methods fail or when dealing with complex UI elements.
    
    Args:
        x: The x-coordinate to click at
        y: The y-coordinate to click at
        
    Returns:
        A message confirming the click action
    """
    from src.agents.browser import driver
    
    try:
        # Using ActionChains for precise coordinate clicks
        actions = ActionChains(driver)
        actions.move_by_offset(x, y).click().perform()
        actions.reset_actions()  # Reset position after click
        
        # Alternative approach using Helium
        # helium.click_at_point(x, y)
        
        time.sleep(1)  # Wait a moment for any reactions to the click
        return f"Successfully clicked at coordinates ({x}, {y})"
    except Exception as e:
        return f"Failed to click at coordinates ({x}, {y}): {str(e)}"