axel-darmouni's picture
update
2508004
raw
history blame
3.99 kB
import os
from tools.webpage_tools import (
visit_webpage,
get_all_links,
read_file_from_url,
)
from tools.exploration_tools import (
get_dataset_description,
)
from tools.drawing_tools import (
plot_departments_data,
)
from tools.libreoffice_tools import (
convert_to_pdf_with_libreoffice,
check_libreoffice_availability,
)
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
LiteLLMModel,
)
def create_web_agent(step_callback):
search_tool = DuckDuckGoSearchTool()
model = LiteLLMModel(
model_id="gemini/gemini-2.5-flash-preview-05-20",
api_key=os.getenv("GEMINI_API_KEY"),
)
web_agent = CodeAgent(
tools=[
search_tool,
visit_webpage, get_all_links, read_file_from_url,
get_dataset_description,
plot_departments_data,
convert_to_pdf_with_libreoffice,
check_libreoffice_availability
],
model=model,
max_steps=30,
verbosity_level=1, # Reduced verbosity for cleaner output
planning_interval=3,
step_callbacks=[step_callback], # Use the built-in callback system
additional_authorized_imports=[
"subprocess", "docx", "docx.*",
"os", "bs4", "io", "requests", "json", "pandas",
"matplotlib", "matplotlib.pyplot", "matplotlib.*", "numpy", "seaborn"
],
)
return web_agent
def generate_prompt(data_gouv_page):
return f"""Fetch me a dataset that can be just read by using the read_file_from_url tool
from {data_gouv_page}
Follow the steps below to generate a pdf report from the dataset.
The steps should be as follows:
1. Examine the page
2. Get all links
3. Get the dataset from the link
4. Get information about the dataset using the get_dataset_description tool
5. Decide on what you can draw based on either department or region data
5.1 if no data department or region level, look for another file!
6. Draw a map of France using your idea
7. Save the map in png file
8. Make as well 3 additional visualizations, not maps, that you can save in png files
9. Write an interesting analysis text for each of your visualizations. Be smart and think cleverly about the data and what it can state
10. Think of next step analysis to look at the data
11. Generate a comprehensive PDF report using the python-docx library that includes:
- A title page with the dataset name and analysis overview
- All your visualizations (PNG files) embedded in the report
- Your analysis text for each visualization
- Conclusions and next steps
Make the visualizations appropriately sized so they fit well in the PDF report.
Convert then that docx file to pdf using the convert_to_pdf_with_libreoffice tool.
Do not overcommit, just do the steps one by one and it should go fine! Do not, under any circumstance, use the 'os' module!
Do not generate a lot of code every step, go slowly but surely and it will work out. Save everything within the generated_data folder.
If question is in english, report is in english.
If question is in french, report is in french.
IMPORTANT LIBREOFFICE NOTES:
- If you need to use LibreOffice, first call check_libreoffice_availability() to verify it's available
- If LibreOffice is available, "LibreOffice found" is returned by "check_libreoffice_availability()"
- Use convert_to_pdf_with_libreoffice() tool instead of subprocess calls
- Do NOT use subprocess.run(['libreoffice', ...]) or subprocess.run(['soffice', ...]) directly
- The LibreOffice tools handle macOS, Linux, and Windows path differences automatically
"""