datagouv-french-data-analyst / followup_agent.py
axel-darmouni's picture
all gemini
244cc53
import os
from tools.followup_tools import (
load_previous_dataset,
get_dataset_summary,
create_followup_visualization,
get_previous_report_content,
analyze_column_correlation,
create_statistical_summary,
filter_and_visualize_data,
)
from tools.retrieval_tools import (
search_datasets,
get_dataset_info,
)
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
LiteLLMModel,
)
def create_followup_agent():
"""Create a specialized agent for follow-up analysis"""
search_tool = DuckDuckGoSearchTool()
model = LiteLLMModel(
model_id="gemini/gemini-2.5-flash-preview-05-20",
api_key=os.getenv("GEMINI_API_KEY"),
)
followup_agent = CodeAgent(
tools=[
search_tool,
load_previous_dataset,
get_dataset_summary,
create_followup_visualization,
get_previous_report_content,
analyze_column_correlation,
create_statistical_summary,
filter_and_visualize_data,
search_datasets,
get_dataset_info,
],
model=model,
max_steps=20,
verbosity_level=1,
planning_interval=2,
additional_authorized_imports=[
"pandas", "numpy", "matplotlib", "matplotlib.pyplot", "seaborn",
"os", "json", "datetime", "math", "statistics"
],
)
return followup_agent
def generate_followup_prompt(user_question, report_context=None):
"""Generate a prompt for follow-up analysis"""
base_prompt = f"""You are a data analysis assistant helping with follow-up questions about a previously generated report.
USER'S FOLLOW-UP QUESTION: "{user_question}"
IMPORTANT: Analyze the user's question carefully to determine what type of response is needed.
QUESTION ANALYSIS:
- **General Information/Web Search**: If the question asks for general information, definitions, or requires web search (e.g., "What is X?", "Tell me about Y", "How does Z work?"), use web search tools and provide a text answer. DO NOT load datasets or create visualizations.
- **Dataset Questions**: If the question is specifically about the dataset, data analysis, statistics, or requests visualizations (e.g., "Show me correlation", "Create a chart", "What's the distribution of X?", "Filter by Y"), then use dataset tools.
- **Report Context**: If the question asks about the previous report content or findings, use get_previous_report_content() and provide a text answer.
AVAILABLE TOOLS:
1. **DuckDuckGoSearchTool** - For general web search and information queries
2. **get_previous_report_content()** - Get context about the previous report
3. **load_previous_dataset()** - Load the dataset (ONLY if question needs data analysis)
4. **get_dataset_summary(df)** - Get dataset structure info
5. **create_followup_visualization()** - Create charts (ONLY if explicitly requested)
6. **analyze_column_correlation()** - Analyze column relationships with plots
7. **create_statistical_summary()** - Generate stats with visualizations
8. **filter_and_visualize_data()** - Filter data and create targeted visualizations
9. **search_datasets()** - Search for additional datasets
10. **get_dataset_info()** - Get info about specific datasets
RESPONSE STRATEGY:
1. **First, determine the question type:**
- Is this a general knowledge/web search question? β†’ Use web search, provide text answer
- Is this about the previous report content? β†’ Use get_previous_report_content(), provide text answer
- Is this a data analysis question? β†’ Use dataset tools as needed
- Does it explicitly ask for a chart/visualization? β†’ Use appropriate visualization tools
2. **For data analysis questions only:**
- Load dataset if you need to analyze the actual data
- Create visualizations ONLY if the user explicitly asks for them or if the question specifically requires visual analysis (correlations, distributions, trends)
- Provide clear text explanations
3. **Be efficient with tool usage:**
- Use the minimum number of tools needed to answer the question
- Don't create visualizations unless requested or clearly needed
- Don't load datasets for general questions
GUIDELINES:
- Match your response type to the question type
- Be direct and efficient - don't over-engineer the response
- Only use data analysis tools when the question is actually about data analysis
- Provide concise, helpful answers focused on what the user asked
Answer the user's question: "{user_question}"
"""
if report_context:
base_prompt += f"""
ADDITIONAL CONTEXT ABOUT PREVIOUS REPORT:
{report_context}
"""
return base_prompt
def run_followup_analysis(user_question, report_context=None):
"""Run a follow-up analysis based on user question"""
try:
# Create the follow-up agent
agent = create_followup_agent()
# Generate the prompt
prompt = generate_followup_prompt(user_question, report_context)
# Run the analysis
result = agent.run(prompt)
return str(result)
except Exception as e:
return f"Error in follow-up analysis: {str(e)}"