Spaces:
Build error
Build error
add model factory
Browse files- .env.example +4 -0
- .gitignore +7 -4
- endpoint-scripts/mount-endpoint.py +22 -0
- poetry.lock +3 -3
- src/__pycache__/agent.cpython-310.pyc +0 -0
- src/__pycache__/browser.cpython-310.pyc +0 -0
- src/__pycache__/browser_utils.cpython-310.pyc +0 -0
- src/__pycache__/file_utils.cpython-310.pyc +0 -0
- src/__pycache__/hub_utils.cpython-310.pyc +0 -0
- src/__pycache__/leaderboard_processor.cpython-310.pyc +0 -0
- src/__pycache__/processor.cpython-310.pyc +0 -0
- src/__pycache__/scheduler.cpython-310.pyc +0 -0
- src/__pycache__/server.cpython-310.pyc +0 -0
- src/__pycache__/tools.cpython-310.pyc +0 -0
- src/agents/fact_checker/fact_checker_agent.py +0 -3
- src/agents/model_factory.py +114 -0
- src/agents/{parser/parser_agent.py → parser_agent.py} +38 -17
- src/leaderboard_processor.py +1 -1
- src/processor.py +1 -1
.env.example
CHANGED
@@ -1,6 +1,10 @@
|
|
|
|
|
|
1 |
# Hugging Face Hub token (required)
|
2 |
# Create one at https://huggingface.co/settings/tokens
|
3 |
HUGGING_FACE_HUB_TOKEN=your_token_here
|
|
|
|
|
4 |
|
5 |
OPENAI_API_KEY=sk-proj-xxxx
|
6 |
|
|
|
1 |
+
MODEL_PROVIDER=huggingface
|
2 |
+
|
3 |
# Hugging Face Hub token (required)
|
4 |
# Create one at https://huggingface.co/settings/tokens
|
5 |
HUGGING_FACE_HUB_TOKEN=your_token_here
|
6 |
+
HUGGING_FACE_INFERENCE_ENDPOINT_URL=https://xxxxxxxxxxx.us-east-1.aws.endpoints.huggingface.cloud
|
7 |
+
HUGGING_FACE_MODEL_NAME=google/gemma-3-12b-it
|
8 |
|
9 |
OPENAI_API_KEY=sk-proj-xxxx
|
10 |
|
.gitignore
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
# Logs
|
2 |
logs
|
3 |
*.log
|
@@ -8,6 +12,8 @@ pnpm-debug.log*
|
|
8 |
lerna-debug.log*
|
9 |
|
10 |
__pycache__
|
|
|
|
|
11 |
*.pyc
|
12 |
|
13 |
client/node_modules
|
@@ -37,7 +43,4 @@ server/data/leaderboards_results.json
|
|
37 |
server/data/leaderboards_runtime.json
|
38 |
|
39 |
cache/
|
40 |
-
server/cache
|
41 |
-
.env
|
42 |
-
data/best_model_for_results.json
|
43 |
-
data/final_leaderboards.json
|
|
|
1 |
+
.env
|
2 |
+
data/best_model_for_results.json
|
3 |
+
data/final_leaderboards.json
|
4 |
+
|
5 |
# Logs
|
6 |
logs
|
7 |
*.log
|
|
|
12 |
lerna-debug.log*
|
13 |
|
14 |
__pycache__
|
15 |
+
**/*/__pycache__
|
16 |
+
src/__pycache__
|
17 |
*.pyc
|
18 |
|
19 |
client/node_modules
|
|
|
43 |
server/data/leaderboards_runtime.json
|
44 |
|
45 |
cache/
|
46 |
+
server/cache
|
|
|
|
|
|
endpoint-scripts/mount-endpoint.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import create_inference_endpoint
|
2 |
+
|
3 |
+
endpoint = create_inference_endpoint(
|
4 |
+
"vllm-meta-llama-3-8b-instruct",
|
5 |
+
repository="meta-llama/Meta-Llama-3-8B-Instruct",
|
6 |
+
framework="pytorch",
|
7 |
+
task="custom",
|
8 |
+
accelerator="gpu",
|
9 |
+
vendor="aws",
|
10 |
+
region="us-east-1",
|
11 |
+
type="protected",
|
12 |
+
instance_type="g5.2xlarge",
|
13 |
+
instance_size="medium",
|
14 |
+
custom_image={
|
15 |
+
"health_route": "/health",
|
16 |
+
"env": { "MAX_MODEL_LEN": "8192" },
|
17 |
+
"url": "philschmi/vllm-hf-inference-endpoints",
|
18 |
+
},
|
19 |
+
)
|
20 |
+
|
21 |
+
endpoint.wait()
|
22 |
+
print(endpoint.status)
|
poetry.lock
CHANGED
@@ -1495,13 +1495,13 @@ files = [
|
|
1495 |
|
1496 |
[[package]]
|
1497 |
name = "openai"
|
1498 |
-
version = "1.66.
|
1499 |
description = "The official Python library for the openai API"
|
1500 |
optional = false
|
1501 |
python-versions = ">=3.8"
|
1502 |
files = [
|
1503 |
-
{file = "openai-1.66.
|
1504 |
-
{file = "openai-1.66.
|
1505 |
]
|
1506 |
|
1507 |
[package.dependencies]
|
|
|
1495 |
|
1496 |
[[package]]
|
1497 |
name = "openai"
|
1498 |
+
version = "1.66.5"
|
1499 |
description = "The official Python library for the openai API"
|
1500 |
optional = false
|
1501 |
python-versions = ">=3.8"
|
1502 |
files = [
|
1503 |
+
{file = "openai-1.66.5-py3-none-any.whl", hash = "sha256:74be528175f8389f67675830c51a15bd51e874425c86d3de6153bf70ed6c2884"},
|
1504 |
+
{file = "openai-1.66.5.tar.gz", hash = "sha256:f61b8fac29490ca8fdc6d996aa6926c18dbe5639536f8c40219c40db05511b11"},
|
1505 |
]
|
1506 |
|
1507 |
[package.dependencies]
|
src/__pycache__/agent.cpython-310.pyc
DELETED
Binary file (13 kB)
|
|
src/__pycache__/browser.cpython-310.pyc
DELETED
Binary file (2.27 kB)
|
|
src/__pycache__/browser_utils.cpython-310.pyc
DELETED
Binary file (3.56 kB)
|
|
src/__pycache__/file_utils.cpython-310.pyc
DELETED
Binary file (8.2 kB)
|
|
src/__pycache__/hub_utils.cpython-310.pyc
DELETED
Binary file (4.29 kB)
|
|
src/__pycache__/leaderboard_processor.cpython-310.pyc
DELETED
Binary file (3.65 kB)
|
|
src/__pycache__/processor.cpython-310.pyc
DELETED
Binary file (9.87 kB)
|
|
src/__pycache__/scheduler.cpython-310.pyc
DELETED
Binary file (2.52 kB)
|
|
src/__pycache__/server.cpython-310.pyc
DELETED
Binary file (2.44 kB)
|
|
src/__pycache__/tools.cpython-310.pyc
DELETED
Binary file (11.3 kB)
|
|
src/agents/fact_checker/fact_checker_agent.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Agent management for the agent leaderboard parser fact checker.
|
3 |
-
"""
|
|
|
|
|
|
|
|
src/agents/model_factory.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import traceback
|
3 |
+
|
4 |
+
def get_temperature():
|
5 |
+
"""
|
6 |
+
Récupère la température depuis la variable d'environnement ou utilise une valeur par défaut.
|
7 |
+
|
8 |
+
Returns:
|
9 |
+
La température à utiliser pour les modèles (float entre 0 et 1)
|
10 |
+
"""
|
11 |
+
try:
|
12 |
+
temp_str = os.environ.get("MODEL_TEMPERATURE", "0.2")
|
13 |
+
temp = float(temp_str)
|
14 |
+
# Limiter la température entre 0 et 1
|
15 |
+
temp = max(0.0, min(1.0, temp))
|
16 |
+
return temp
|
17 |
+
except ValueError:
|
18 |
+
print(f"AVERTISSEMENT: La valeur MODEL_TEMPERATURE '{temp_str}' n'est pas un nombre valide. Utilisation de la valeur par défaut 0.2.")
|
19 |
+
return 0.2
|
20 |
+
|
21 |
+
def get_hf_model():
|
22 |
+
"""
|
23 |
+
Initialise et retourne un modèle Hugging Face via un endpoint personnalisé.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
Le modèle configuré ou None en cas d'erreur
|
27 |
+
"""
|
28 |
+
hf_endpoint_url = os.environ.get("HUGGING_FACE_INFERENCE_ENDPOINT_URL")
|
29 |
+
hf_token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
30 |
+
hf_model_name = os.environ.get("HUGGING_FACE_MODEL_NAME", "google/gemma-3-12b-it")
|
31 |
+
temperature = get_temperature()
|
32 |
+
|
33 |
+
# Vérifier que les variables d'environnement sont définies
|
34 |
+
if not hf_endpoint_url or not hf_token:
|
35 |
+
print("ERREUR: HUGGING_FACE_INFERENCE_ENDPOINT_URL ou HUGGING_FACE_HUB_TOKEN non configuré.")
|
36 |
+
return None
|
37 |
+
|
38 |
+
print(f"Utilisation du modèle {hf_model_name} via endpoint personnalisé: {hf_endpoint_url}")
|
39 |
+
print(f"Température configurée: {temperature}")
|
40 |
+
|
41 |
+
try:
|
42 |
+
# Utiliser OpenAIServerModel qui est plus adapté pour les endpoints d'inférence
|
43 |
+
from smolagents import OpenAIServerModel
|
44 |
+
|
45 |
+
model = OpenAIServerModel(
|
46 |
+
model_id=hf_model_name,
|
47 |
+
api_base=hf_endpoint_url,
|
48 |
+
api_key=hf_token,
|
49 |
+
temperature=temperature
|
50 |
+
)
|
51 |
+
|
52 |
+
print("Modèle OpenAIServerModel initialisé avec succès!")
|
53 |
+
return model
|
54 |
+
except Exception as e:
|
55 |
+
error_trace = traceback.format_exc()
|
56 |
+
print(f"ERREUR lors de l'initialisation de OpenAIServerModel: {e}")
|
57 |
+
print(f"Traceback complet: {error_trace}")
|
58 |
+
return None
|
59 |
+
|
60 |
+
def get_openai_model():
|
61 |
+
"""
|
62 |
+
Initialise et retourne un modèle OpenAI GPT-4o.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
Le modèle configuré ou None en cas d'erreur
|
66 |
+
"""
|
67 |
+
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
68 |
+
temperature = get_temperature()
|
69 |
+
|
70 |
+
if not openai_api_key:
|
71 |
+
print("ERREUR: OPENAI_API_KEY non configuré.")
|
72 |
+
return None
|
73 |
+
|
74 |
+
try:
|
75 |
+
# Configuration pour OpenAI
|
76 |
+
from smolagents import LiteLLMModel
|
77 |
+
|
78 |
+
print("Utilisation du modèle OpenAI GPT-4o")
|
79 |
+
print(f"Température configurée: {temperature}")
|
80 |
+
|
81 |
+
return LiteLLMModel(
|
82 |
+
"gpt-4o",
|
83 |
+
temperature=temperature,
|
84 |
+
api_key=openai_api_key
|
85 |
+
)
|
86 |
+
except Exception as e:
|
87 |
+
error_trace = traceback.format_exc()
|
88 |
+
print(f"ERREUR lors de l'initialisation de LiteLLMModel: {e}")
|
89 |
+
print(f"Traceback complet: {error_trace}")
|
90 |
+
return None
|
91 |
+
|
92 |
+
def get_model():
|
93 |
+
"""
|
94 |
+
Retourne le modèle à utiliser en fonction de MODEL_PROVIDER.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
Un modèle configuré ou None en cas d'erreur
|
98 |
+
"""
|
99 |
+
# Récupérer le modèle à utiliser depuis les variables d'environnement
|
100 |
+
model_provider = os.environ.get("MODEL_PROVIDER", "openai").lower()
|
101 |
+
|
102 |
+
if model_provider == "huggingface":
|
103 |
+
model = get_hf_model()
|
104 |
+
if model:
|
105 |
+
return model
|
106 |
+
print("Passage à OpenAI suite à l'erreur.")
|
107 |
+
model_provider = "openai"
|
108 |
+
|
109 |
+
if model_provider == "openai":
|
110 |
+
return get_openai_model()
|
111 |
+
|
112 |
+
# Si on arrive ici, c'est que MODEL_PROVIDER n'est ni huggingface ni openai
|
113 |
+
print(f"ERREUR: MODEL_PROVIDER '{model_provider}' non reconnu. Utiliser 'huggingface' ou 'openai'")
|
114 |
+
return None
|
src/agents/{parser/parser_agent.py → parser_agent.py}
RENAMED
@@ -1,9 +1,13 @@
|
|
1 |
"""
|
2 |
Agent management for the leaderboard parser.
|
3 |
"""
|
|
|
4 |
import datetime
|
|
|
|
|
5 |
from smolagents import CodeAgent
|
6 |
-
from smolagents.
|
|
|
7 |
|
8 |
from src.agents.browser import save_screenshot
|
9 |
from src.agents.tools import (
|
@@ -20,38 +24,46 @@ from src.agents.tools import (
|
|
20 |
)
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def initialize_agent(model):
|
24 |
"""
|
25 |
-
Initialize
|
26 |
|
27 |
Args:
|
28 |
-
model: The
|
29 |
|
30 |
Returns:
|
31 |
-
|
32 |
"""
|
33 |
return CodeAgent(
|
34 |
tools=[go_back, map_clickable_elements, validate_json_results, close_popups, search_item_ctrl_f, extract_table_data, find_leaderboard_elements, copy_link_from_element, find_model_links, click_at_coordinates],
|
35 |
model=model,
|
36 |
-
additional_authorized_imports=["selenium", "helium", "time", "json", "re"],
|
37 |
step_callbacks=[save_screenshot],
|
38 |
max_steps=25,
|
39 |
verbosity_level=2,
|
40 |
)
|
41 |
|
42 |
|
43 |
-
def get_default_model():
|
44 |
-
"""
|
45 |
-
Get the default model for the agent.
|
46 |
-
|
47 |
-
Returns:
|
48 |
-
A configured model instance
|
49 |
-
"""
|
50 |
-
model_type = "LiteLLMModel"
|
51 |
-
model_id = "gpt-4o"
|
52 |
-
return load_model(model_type, model_id)
|
53 |
-
|
54 |
-
|
55 |
# Instructions for the agent
|
56 |
leaderboard_instructions = """
|
57 |
Your task is to extract the three BEST models from the leaderboard. It is crucial that you identify the models that are at the top of the ranking, not just any three models present on the page.
|
@@ -285,6 +297,15 @@ def process_leaderboard(url, model, index, uid=None, additional_rules=None):
|
|
285 |
now = datetime.datetime.now()
|
286 |
parsed_at = now.isoformat()
|
287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
initialize_driver()
|
289 |
|
290 |
agent = initialize_agent(model)
|
|
|
1 |
"""
|
2 |
Agent management for the leaderboard parser.
|
3 |
"""
|
4 |
+
import os
|
5 |
import datetime
|
6 |
+
import json
|
7 |
+
import requests
|
8 |
from smolagents import CodeAgent
|
9 |
+
from smolagents.models import HfApiModel
|
10 |
+
from src.agents.model_factory import get_model as get_default_model
|
11 |
|
12 |
from src.agents.browser import save_screenshot
|
13 |
from src.agents.tools import (
|
|
|
24 |
)
|
25 |
|
26 |
|
27 |
+
def load_model(model_type, model_id):
|
28 |
+
"""
|
29 |
+
Load a model by its type and ID.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
model_type: The type of model to load
|
33 |
+
model_id: The ID of the model
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
The loaded model
|
37 |
+
"""
|
38 |
+
if model_type == "HfApiModel":
|
39 |
+
return HfApiModel(model_id=model_id)
|
40 |
+
elif model_type == "LiteLLMModel":
|
41 |
+
from smolagents import LiteLLMModel
|
42 |
+
return LiteLLMModel(model_id)
|
43 |
+
else:
|
44 |
+
raise ValueError(f"Unknown model type: {model_type}")
|
45 |
+
|
46 |
+
|
47 |
def initialize_agent(model):
|
48 |
"""
|
49 |
+
Initialize an agent with the given model.
|
50 |
|
51 |
Args:
|
52 |
+
model: The model to use for the agent
|
53 |
|
54 |
Returns:
|
55 |
+
The initialized agent
|
56 |
"""
|
57 |
return CodeAgent(
|
58 |
tools=[go_back, map_clickable_elements, validate_json_results, close_popups, search_item_ctrl_f, extract_table_data, find_leaderboard_elements, copy_link_from_element, find_model_links, click_at_coordinates],
|
59 |
model=model,
|
60 |
+
additional_authorized_imports=["selenium", "helium", "time", "json", "re", "src.agents.browser"],
|
61 |
step_callbacks=[save_screenshot],
|
62 |
max_steps=25,
|
63 |
verbosity_level=2,
|
64 |
)
|
65 |
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
# Instructions for the agent
|
68 |
leaderboard_instructions = """
|
69 |
Your task is to extract the three BEST models from the leaderboard. It is crucial that you identify the models that are at the top of the ranking, not just any three models present on the page.
|
|
|
297 |
now = datetime.datetime.now()
|
298 |
parsed_at = now.isoformat()
|
299 |
|
300 |
+
# Vérifier si le modèle est None
|
301 |
+
if model is None:
|
302 |
+
return {
|
303 |
+
"results": None,
|
304 |
+
"parsing_status": "error",
|
305 |
+
"parsing_message": "Model initialization failed - check HUGGING_FACE_INFERENCE_ENDPOINT_URL and HUGGING_FACE_HUB_TOKEN",
|
306 |
+
"parsed_at": parsed_at
|
307 |
+
}
|
308 |
+
|
309 |
initialize_driver()
|
310 |
|
311 |
agent = initialize_agent(model)
|
src/leaderboard_processor.py
CHANGED
@@ -3,7 +3,7 @@ Leaderboard processing.
|
|
3 |
"""
|
4 |
import datetime
|
5 |
import os
|
6 |
-
from src.agents.
|
7 |
from src.file_utils import create_category_slug, split_combined_id
|
8 |
|
9 |
|
|
|
3 |
"""
|
4 |
import datetime
|
5 |
import os
|
6 |
+
from src.agents.parser_agent import process_leaderboard
|
7 |
from src.file_utils import create_category_slug, split_combined_id
|
8 |
|
9 |
|
src/processor.py
CHANGED
@@ -16,7 +16,7 @@ from src.file_utils import create_category_slug, split_combined_id, create_combi
|
|
16 |
from src.file_utils import load_and_validate_results, validate_leaderboard_result
|
17 |
from src.hub_utils import upload_to_hub, download_from_hub
|
18 |
from src.leaderboard_processor import process_single_leaderboard
|
19 |
-
from src.agents.
|
20 |
from src.agents.browser import cleanup_browser
|
21 |
|
22 |
# Configure logger
|
|
|
16 |
from src.file_utils import load_and_validate_results, validate_leaderboard_result
|
17 |
from src.hub_utils import upload_to_hub, download_from_hub
|
18 |
from src.leaderboard_processor import process_single_leaderboard
|
19 |
+
from src.agents.parser_agent import get_default_model
|
20 |
from src.agents.browser import cleanup_browser
|
21 |
|
22 |
# Configure logger
|