tfrere's picture
first commit
0821095
"""
Tools for the leaderboard agent.
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import re
import time
import helium
from smolagents import tool
@tool
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
"""
Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
Args:
text: The text to search for
nth_result: Which occurrence to jump to (default: 1)
"""
from src.agents.browser import driver
elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
if nth_result > len(elements):
raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
result = f"Found {len(elements)} matches for '{text}'."
elem = elements[nth_result - 1]
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
result += f"Focused on element {nth_result} of {len(elements)}"
return result
@tool
def go_back() -> str:
"""
Navigate back to the previous page.
"""
from src.agents.browser import driver
driver.back()
time.sleep(2) # Wait for page to load
return "Navigated back to previous page"
@tool
def close_popups() -> str:
"""
Closes any popup/modal dialogs that might be open on the page.
Useful when pop-ups appear (cookies, login prompts, etc.) that block interaction.
"""
from src.agents.browser import driver
# Try to find common popup elements
popup_selectors = [
"//button[contains(text(), 'Accept')]",
"//button[contains(text(), 'Close')]",
"//button[contains(text(), 'Fermer')]",
"//button[contains(text(), 'OK')]",
"//button[contains(text(), 'Got it')]",
"//button[contains(@class, 'close')]",
"//div[contains(@class, 'popup')]//button",
"//div[contains(@class, 'modal')]//button",
"//div[contains(@class, 'dialog')]//button"
]
found = False
for selector in popup_selectors:
try:
popup_elements = driver.find_elements(By.XPATH, selector)
for elem in popup_elements:
if elem.is_displayed():
elem.click()
found = True
time.sleep(0.5) # Wait for popup to disappear
except Exception as e:
pass # Ignore errors, try next selector
return "Closed popup dialogs" if found else "No popup dialogs found"
@tool
def extract_table_data(table_caption: str = None, table_index: int = 1) -> str:
"""
Extracts data from a table on the page. Can find a table by caption/title or by index.
Args:
table_caption: Text in or near the table to find (default: None - will use index)
table_index: The index of the table if caption is not provided (1-based)
"""
from src.agents.browser import driver
tables = driver.find_elements(By.TAG_NAME, "table")
if not tables:
return "No tables found on the page."
result = f"Found {len(tables)} table(s) on the page.\n"
for i, table in enumerate(tables):
result += f"\nTable {i+1}:\n"
# Try to get headers
headers = table.find_elements(By.TAG_NAME, "th")
if headers:
header_texts = [header.text for header in headers]
result += f"Headers: {', '.join(header_texts)}\n"
# Get rows
rows = table.find_elements(By.TAG_NAME, "tr")
result += f"Found {len(rows)} rows.\n"
# Get first 5 rows as sample
for j, row in enumerate(rows[:5]):
cells = row.find_elements(By.TAG_NAME, "td")
if cells:
cell_texts = [cell.text for cell in cells]
result += f"Row {j+1}: {' | '.join(cell_texts)}\n"
return result
@tool
def find_leaderboard_elements() -> str:
"""
Find key elements of a leaderboard: title, evaluation criteria, and model rankings.
Returns a structured description of what was found.
"""
from src.agents.browser import driver
result = ""
# Check for tables first
tables = driver.find_elements(By.TAG_NAME, "table")
if tables:
result += f"Found {len(tables)} table(s) that might contain leaderboard data.\n"
# Check for ordered lists
ol_elements = driver.find_elements(By.TAG_NAME, "ol")
if ol_elements:
result += f"Found {len(ol_elements)} ordered list(s) that might contain rankings.\n"
# Check for div elements with grid or flex display that might be custom leaderboards
grid_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'grid') or contains(@class, 'flex') or contains(@class, 'table') or contains(@class, 'rank') or contains(@class, 'leaderboard')]")
if grid_elements:
result += f"Found {len(grid_elements)} div elements with grid/flex/table classes that might be custom leaderboards.\n"
# Look for elements with rank or position indicators
rank_elements = driver.find_elements(By.XPATH, "//*[contains(@class, 'rank') or contains(@class, 'position') or contains(@class, 'standing')]")
if rank_elements:
result += f"Found {len(rank_elements)} elements with rank/position classes.\n"
if not result:
return "Could not find any obvious leaderboard elements. Try scrolling or navigating to the correct section."
return result
@tool
def map_clickable_elements(keyword: str = None) -> str:
"""
Displays a list of all clickable elements on the page with their coordinates.
Args:
keyword: Optional keyword to filter elements. If specified, only elements containing this keyword will be displayed.
Returns:
A string listing all clickable elements with their coordinates.
"""
from src.agents.browser import driver
clickable_selectors = [
"a", "button", "input[type='button']", "input[type='submit']",
".clickable", "[role='button']", "[onclick]"
]
result = "Éléments cliquables détectés:\n"
total = 0
for selector in clickable_selectors:
elements = driver.find_elements(By.CSS_SELECTOR, selector)
for i, element in enumerate(elements):
try:
text = element.text.strip()
if not text and element.get_attribute("value"):
text = element.get_attribute("value")
# Ignorer les éléments vides ou non visibles
if not text or not element.is_displayed():
continue
# Filtrer par mot-clé si spécifié
if keyword and keyword.lower() not in text.lower():
continue
rect = element.rect
x = int(rect['x'] + rect['width']/2)
y = int(rect['y'] + rect['height']/2)
result += f"{total+1}. '{text}' ({selector}) - coords: x={x}, y={y}\n"
total += 1
except:
continue
result += f"\nTotal: {total} éléments cliquables" + (" contenant '" + keyword + "'" if keyword else "")
return result
@tool
def copy_link_from_element(text_to_find: str, link_position: int = 1) -> str:
"""
Find elements with specified text and return the URL if it's a link or has a parent link.
Args:
text_to_find: Text to search for
link_position: If multiple matches, which one to use (1-based)
"""
from src.agents.browser import driver
try:
# Try to find an element with the given text
element = driver.find_element_by_xpath(f"//*[contains(text(), '{text_to_find}')]")
if not element:
return f"No element containing the text '{text_to_find}' was found."
# Try to find URL directly from the element
href = element.get_attribute("href")
if href:
return f"URL found: {href}"
# Try to find a parent that is a link
parent = element.find_element_by_xpath("./ancestor::a")
if parent:
href = parent.get_attribute("href")
if href:
return f"URL found in parent element: {href}"
# Try to find a child that is a link
child = element.find_element_by_xpath(".//a")
if child:
href = child.get_attribute("href")
if href:
return f"URL found in child element: {href}"
# Méthode 4: Essayer le clic droit et "Copier l'adresse du lien"
actions = ActionChains(driver)
actions.context_click(element).perform()
# Attendre un peu pour que le menu contextuel s'affiche
import time
time.sleep(1)
# Essayer de trouver et cliquer sur "Copier l'adresse du lien" ou équivalent
# Note: Cette partie est très dépendante du navigateur et de la langue
copy_link_texts = ["Copy link address", "Copier l'adresse du lien", "Copy Link", "Copier le lien"]
for text in copy_link_texts:
try:
link_option = driver.find_element(By.XPATH, f"//div[contains(text(), '{text}')]")
link_option.click()
return f"Action 'Copier l'adresse du lien' effectuée pour '{text_to_find}'"
except:
continue
# Annuler le menu contextuel
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
return f"Impossible de trouver un lien pour l'élément '{text_to_find}' avec les méthodes disponibles."
except Exception as e:
return f"Erreur lors de la recherche du lien: {str(e)}"
@tool
def validate_json_results(result: dict) -> tuple[bool, str]:
"""
Checks that the results do not contain generic placeholders.
Args:
result: The result to validate
Returns:
A tuple containing a boolean indicating if the result is valid and a message
explaining why the result is invalid if it is not valid.
"""
if not result or not isinstance(result, dict):
return False, "Invalid result"
if "top_models" not in result or len(result.get("top_models", [])) < 3:
return False, "Less than 3 models found"
# Check for duplicate models
seen_models = set()
for model in result.get("top_models", []):
model_name = model.get("name", "").lower()
if model_name in seen_models:
return False, f"Duplicate model '{model.get('name')}' found. Please ensure each model is unique."
seen_models.add(model_name)
# Check for generic names
generic_names = ["model a", "model b", "model c", "model 1", "model 2", "model 3", "model name", "unavailable"]
model_names = [m.get("name", "").lower() for m in result.get("top_models", [])]
if any(name in generic_names for name in model_names):
return False, "Generic model names detected"
# Check for unwanted suffixes in model names
unwanted_suffix_pattern = r"\(.*\)$"
for model in result.get("top_models", []):
if re.search(unwanted_suffix_pattern, model.get("name", "")):
return False, f"Model name '{model.get('name')}' contains unwanted suffixes. Please remove them if you think they are not part of the model name. If it's a version number or a date, keep it."
# Check for generic URLs
generic_urls = ["example.com", "example.org"]
model_urls = [m.get("url", "").lower() for m in result.get("top_models", []) if m.get("url") is not None]
if any(generic in url for url in model_urls for generic in generic_urls):
return False, "Generic URLs detected"
# Check for submatch between model name and URL
for model in result.get("top_models", []):
name = model.get("name", "").lower()
url = model.get("url")
# Skip validation if URL is None or empty - this is acceptable, so no warning
if not url:
continue
url = url.lower()
if url and not any(name[i:i+4] in url for i in range(len(name) - 3)):
return False, f"URL for model '{model.get('name')}' does not have a valid submatch with the name. This is probably a wrong URL. Please check the URL and try again."
# Check the evaluation criterion
if "evaluation_criteria" not in result or len(result.get("evaluation_criteria", "")) < 10:
return False, "Evaluation criterion missing or too short"
return True, "Valid results"
@tool
def find_model_links(model_name: str) -> str:
"""
Search for links that might point to a model based on their URL
and their match with the model name.
Args:
model_name: The name of the model to search for
Returns:
A list of potential links to the model
"""
from src.agents.browser import driver
try:
# 1. Retrieve all links on the page
all_links = driver.find_elements(By.TAG_NAME, "a")
if not all_links:
return "No links were found on the page."
# 2. Known patterns for model repositories
model_url_patterns = [
r'huggingface\.co/[^/]+/[^/]+', # Hugging Face model repo
r'github\.com/[^/]+/[^/]+', # GitHub repo
]
model_links = []
model_name_lower = model_name.lower()
for link in all_links:
try:
# Check if the link is visible and has an href attribute
if not link.is_displayed() or not link.get_attribute('href'):
continue
link_url = link.get_attribute('href')
link_text = link.text.strip()
# Ignore links to non-relevant resources
if link_url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.css', '.js')):
continue
# Check if the URL matches a known pattern
matches_pattern = any(re.search(pattern, link_url, re.IGNORECASE) for pattern in model_url_patterns)
if matches_pattern:
# Check for a 3-character submatch between the model name and the URL
url_lower = link_url.lower()
has_submatch = False
# Search for a 3-character submatch in the model name
for i in range(len(model_name_lower) - 4):
if model_name_lower[i:i+5] in url_lower and model_name_lower[i:i+5] in link_text.lower():
has_submatch = True
break
if has_submatch:
# Calculate the confidence based on character matches
confidence = sum(1 for c in model_name_lower if c in link_text.lower())
model_links.append({
'url': link_url,
'text': link_text,
'confidence': confidence
})
except Exception as e:
continue # Ignore errors and continue
# 3. Format the result
if not model_links:
return f"No potential links to the model '{model_name}' were found."
result = f"Found {len(model_links)} potential links for the model '{model_name}':\n\n"
for i, link in enumerate(model_links):
result += f"Candidate {i+1}:\n"
result += f"URL: {link['url']}\n"
result += f"Text: {link['text']}\n"
result += f"Confidence: {link['confidence']}\n\n"
# 4. Suggest the best candidate (the one with the highest confidence)
if model_links:
best_candidate = max(model_links, key=lambda x: x['confidence'])
result += f"Best candidate for '{model_name}':\nURL: {best_candidate['url']}\nText: {best_candidate['text']} "
return result
except Exception as e:
return f"Error while searching for links for the model '{model_name}': {str(e)}"
@tool
def click_at_coordinates(x: int, y: int) -> str:
"""
Clicks at the specified x,y coordinates on the page.
This is useful when other targeting methods fail or when dealing with complex UI elements.
Args:
x: The x-coordinate to click at
y: The y-coordinate to click at
Returns:
A message confirming the click action
"""
from src.agents.browser import driver
try:
# Using ActionChains for precise coordinate clicks
actions = ActionChains(driver)
actions.move_by_offset(x, y).click().perform()
actions.reset_actions() # Reset position after click
# Alternative approach using Helium
# helium.click_at_point(x, y)
time.sleep(1) # Wait a moment for any reactions to the click
return f"Successfully clicked at coordinates ({x}, {y})"
except Exception as e:
return f"Failed to click at coordinates ({x}, {y}): {str(e)}"