|
|
import os |
|
|
from tools.webpage_tools import ( |
|
|
visit_webpage, |
|
|
get_all_links, |
|
|
read_file_from_url, |
|
|
) |
|
|
from tools.exploration_tools import ( |
|
|
get_dataset_description, |
|
|
) |
|
|
from tools.drawing_tools import ( |
|
|
plot_departments_data, |
|
|
) |
|
|
from tools.libreoffice_tools import ( |
|
|
convert_to_pdf_with_libreoffice, |
|
|
check_libreoffice_availability, |
|
|
) |
|
|
from smolagents import ( |
|
|
CodeAgent, |
|
|
DuckDuckGoSearchTool, |
|
|
LiteLLMModel, |
|
|
) |
|
|
|
|
|
def create_web_agent(step_callback): |
|
|
search_tool = DuckDuckGoSearchTool() |
|
|
model = LiteLLMModel( |
|
|
model_id="gemini/gemini-2.5-flash-preview-05-20", |
|
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
|
) |
|
|
web_agent = CodeAgent( |
|
|
tools=[ |
|
|
search_tool, |
|
|
visit_webpage, get_all_links, read_file_from_url, |
|
|
get_dataset_description, |
|
|
plot_departments_data, |
|
|
convert_to_pdf_with_libreoffice, |
|
|
check_libreoffice_availability |
|
|
], |
|
|
model=model, |
|
|
max_steps=30, |
|
|
verbosity_level=1, |
|
|
planning_interval=3, |
|
|
step_callbacks=[step_callback], |
|
|
additional_authorized_imports=[ |
|
|
"subprocess", "docx", "docx.*", |
|
|
"os", "bs4", "io", "requests", "json", "pandas", |
|
|
"matplotlib", "matplotlib.pyplot", "matplotlib.*", "numpy", "seaborn" |
|
|
], |
|
|
) |
|
|
return web_agent |
|
|
|
|
|
def generate_prompt(data_gouv_page): |
|
|
return f"""Fetch me a dataset that can be just read by using the read_file_from_url tool |
|
|
from {data_gouv_page} |
|
|
Follow the steps below to generate a pdf report from the dataset. |
|
|
|
|
|
The steps should be as follows: |
|
|
1. Examine the page |
|
|
2. Get all links |
|
|
3. Get the dataset from the link |
|
|
4. Get information about the dataset using the get_dataset_description tool |
|
|
5. Decide on what you can draw based on either department or region data |
|
|
5.1 if no data department or region level, look for another file! |
|
|
6. Draw a map of France using your idea |
|
|
7. Save the map in png file |
|
|
8. Make as well 3 additional visualizations, not maps, that you can save in png files |
|
|
9. Write an interesting analysis text for each of your visualizations. Be smart and think cleverly about the data and what it can state |
|
|
10. Think of next step analysis to look at the data |
|
|
11. Generate a comprehensive PDF report using the python-docx library that includes: |
|
|
- A title page with the dataset name and analysis overview |
|
|
- All your visualizations (PNG files) embedded in the report |
|
|
- Your analysis text for each visualization |
|
|
- Conclusions and next steps |
|
|
Make the visualizations appropriately sized so they fit well in the PDF report. |
|
|
Convert then that docx file to pdf using the convert_to_pdf_with_libreoffice tool. |
|
|
|
|
|
Do not overcommit, just do the steps one by one and it should go fine! Do not, under any circumstance, use the 'os' module! |
|
|
Do not generate a lot of code every step, go slowly but surely and it will work out. Save everything within the generated_data folder. |
|
|
If question is in english, report is in english. |
|
|
If question is in french, report is in french. |
|
|
|
|
|
IMPORTANT LIBREOFFICE NOTES: |
|
|
- If you need to use LibreOffice, first call check_libreoffice_availability() to verify it's available |
|
|
- If LibreOffice is available, "LibreOffice found" is returned by "check_libreoffice_availability()" |
|
|
- Use convert_to_pdf_with_libreoffice() tool instead of subprocess calls |
|
|
- Do NOT use subprocess.run(['libreoffice', ...]) or subprocess.run(['soffice', ...]) directly |
|
|
- The LibreOffice tools handle macOS, Linux, and Windows path differences automatically |
|
|
""" |