|
import os |
|
from tools.followup_tools import ( |
|
load_previous_dataset, |
|
get_dataset_summary, |
|
create_followup_visualization, |
|
get_previous_report_content, |
|
analyze_column_correlation, |
|
create_statistical_summary, |
|
filter_and_visualize_data, |
|
) |
|
from tools.retrieval_tools import ( |
|
search_datasets, |
|
get_dataset_info, |
|
) |
|
from smolagents import ( |
|
CodeAgent, |
|
DuckDuckGoSearchTool, |
|
LiteLLMModel, |
|
) |
|
|
|
def create_followup_agent(): |
|
"""Create a specialized agent for follow-up analysis""" |
|
search_tool = DuckDuckGoSearchTool() |
|
model = LiteLLMModel( |
|
model_id="gemini/gemini-2.5-flash-preview-05-20", |
|
api_key=os.getenv("GEMINI_API_KEY"), |
|
) |
|
|
|
followup_agent = CodeAgent( |
|
tools=[ |
|
search_tool, |
|
load_previous_dataset, |
|
get_dataset_summary, |
|
create_followup_visualization, |
|
get_previous_report_content, |
|
analyze_column_correlation, |
|
create_statistical_summary, |
|
filter_and_visualize_data, |
|
search_datasets, |
|
get_dataset_info, |
|
], |
|
model=model, |
|
max_steps=20, |
|
verbosity_level=1, |
|
planning_interval=2, |
|
additional_authorized_imports=[ |
|
"pandas", "numpy", "matplotlib", "matplotlib.pyplot", "seaborn", |
|
"os", "json", "datetime", "math", "statistics" |
|
], |
|
) |
|
return followup_agent |
|
|
|
def generate_followup_prompt(user_question, report_context=None): |
|
"""Generate a prompt for follow-up analysis""" |
|
|
|
base_prompt = f"""You are a data analysis assistant helping with follow-up questions about a previously generated report. |
|
|
|
USER'S FOLLOW-UP QUESTION: "{user_question}" |
|
|
|
IMPORTANT: Analyze the user's question carefully to determine what type of response is needed. |
|
|
|
QUESTION ANALYSIS: |
|
- **General Information/Web Search**: If the question asks for general information, definitions, or requires web search (e.g., "What is X?", "Tell me about Y", "How does Z work?"), use web search tools and provide a text answer. DO NOT load datasets or create visualizations. |
|
|
|
- **Dataset Questions**: If the question is specifically about the dataset, data analysis, statistics, or requests visualizations (e.g., "Show me correlation", "Create a chart", "What's the distribution of X?", "Filter by Y"), then use dataset tools. |
|
|
|
- **Report Context**: If the question asks about the previous report content or findings, use get_previous_report_content() and provide a text answer. |
|
|
|
AVAILABLE TOOLS: |
|
1. **DuckDuckGoSearchTool** - For general web search and information queries |
|
2. **get_previous_report_content()** - Get context about the previous report |
|
3. **load_previous_dataset()** - Load the dataset (ONLY if question needs data analysis) |
|
4. **get_dataset_summary(df)** - Get dataset structure info |
|
5. **create_followup_visualization()** - Create charts (ONLY if explicitly requested) |
|
6. **analyze_column_correlation()** - Analyze column relationships with plots |
|
7. **create_statistical_summary()** - Generate stats with visualizations |
|
8. **filter_and_visualize_data()** - Filter data and create targeted visualizations |
|
9. **search_datasets()** - Search for additional datasets |
|
10. **get_dataset_info()** - Get info about specific datasets |
|
|
|
RESPONSE STRATEGY: |
|
1. **First, determine the question type:** |
|
- Is this a general knowledge/web search question? β Use web search, provide text answer |
|
- Is this about the previous report content? β Use get_previous_report_content(), provide text answer |
|
- Is this a data analysis question? β Use dataset tools as needed |
|
- Does it explicitly ask for a chart/visualization? β Use appropriate visualization tools |
|
|
|
2. **For data analysis questions only:** |
|
- Load dataset if you need to analyze the actual data |
|
- Create visualizations ONLY if the user explicitly asks for them or if the question specifically requires visual analysis (correlations, distributions, trends) |
|
- Provide clear text explanations |
|
|
|
3. **Be efficient with tool usage:** |
|
- Use the minimum number of tools needed to answer the question |
|
- Don't create visualizations unless requested or clearly needed |
|
- Don't load datasets for general questions |
|
|
|
GUIDELINES: |
|
- Match your response type to the question type |
|
- Be direct and efficient - don't over-engineer the response |
|
- Only use data analysis tools when the question is actually about data analysis |
|
- Provide concise, helpful answers focused on what the user asked |
|
|
|
Answer the user's question: "{user_question}" |
|
""" |
|
|
|
if report_context: |
|
base_prompt += f""" |
|
|
|
ADDITIONAL CONTEXT ABOUT PREVIOUS REPORT: |
|
{report_context} |
|
""" |
|
|
|
return base_prompt |
|
|
|
def run_followup_analysis(user_question, report_context=None): |
|
"""Run a follow-up analysis based on user question""" |
|
try: |
|
|
|
agent = create_followup_agent() |
|
|
|
|
|
prompt = generate_followup_prompt(user_question, report_context) |
|
|
|
|
|
result = agent.run(prompt) |
|
|
|
return str(result) |
|
|
|
except Exception as e: |
|
return f"Error in follow-up analysis: {str(e)}" |