girishwangikar's picture
Update app.py
fd1209e verified
raw
history blame
8.37 kB
import streamlit as st
import pandas as pd
from smolagents import CodeAgent, tool
from typing import Union, List, Dict
from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from groq import Groq
import os
import re
class GroqLLM:
"""Compatible LLM interface for smolagents CodeAgent"""
def __init__(self, model_name="llama-3.1-8B-Instant"):
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
self.model_name = model_name
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
"""Make the class callable as required by smolagents"""
try:
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
completion = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": prompt_str}],
temperature=0.7,
max_tokens=1024,
stream=False
)
return completion.choices[0].message.content if completion.choices else "Error: No response generated"
except Exception as e:
return f"Error generating response: {str(e)}"
class NewsAnalysisAgent(CodeAgent):
"""Extended CodeAgent with news search and analysis capabilities"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._articles = []
self._search_results = []
@property
def articles(self) -> List[Dict]:
"""Access stored article data"""
return self._articles
@property
def search_results(self) -> List[Dict]:
"""Access stored search results"""
return self._search_results
def run(self, prompt: str) -> str:
"""Override run method to include context about available tools"""
enhanced_prompt = f"""
You are a news analysis assistant that can:
- Search for recent news articles
- Extract and analyze article content
- Summarize key points
- Identify trends and patterns
Task: {prompt}
Use the provided tools to search and analyze news content.
"""
return super().run(enhanced_prompt)
def extract_text_from_url(url: str) -> str:
"""Helper function to extract text content from a URL using BeautifulSoup"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts, styles, and navigation elements
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
element.decompose()
# Extract text from paragraphs
paragraphs = soup.find_all('p')
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
# Basic cleaning
text = re.sub(r'\s+', ' ', text)
return text
except Exception as e:
return f"Error extracting text: {str(e)}"
@tool
def search_news(query: str, max_results: int = 5) -> str:
"""Search for recent news articles using DuckDuckGo.
Args:
query: Search query string
max_results: Maximum number of results to return
Returns:
str: Formatted string containing search results with titles and URLs
"""
try:
with DDGS() as ddgs:
search_results = list(ddgs.news(
query,
max_results=max_results,
timeframe='d' # Last 24 hours
))
# Store results in agent
tool.agent._search_results = search_results
# Format results
formatted_results = []
for idx, result in enumerate(search_results, 1):
formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n")
return "\n".join(formatted_results)
except Exception as e:
return f"Error searching news: {str(e)}"
@tool
def analyze_article(url: str) -> str:
"""Extract and analyze content from a news article URL.
Args:
url: URL of the news article to analyze
Returns:
str: Analysis of the article including summary and key points
"""
try:
# Extract text content
content = extract_text_from_url(url)
# Use LLM to generate summary and analysis
analysis_prompt = f"""
Please analyze this article content and provide:
1. A brief summary (2-3 sentences)
2. Key points (3-5 main takeaways)
3. Main topics/themes discussed
Article content:
{content[:3000]} # Limit content length for token constraints
"""
analysis = tool.agent.model(analysis_prompt)
# Store article data
article_data = {
'url': url,
'content': content[:1000], # Store truncated content
'analysis': analysis,
'date': datetime.now().strftime('%Y-%m-%d')
}
tool.agent._articles.append(article_data)
return analysis
except Exception as e:
return f"Error analyzing article: {str(e)}"
@tool
def identify_trends(articles: List[Dict] = None) -> str:
"""Identify common themes and trends across analyzed articles.
Args:
articles: List of analyzed article data (optional, uses stored articles if None)
Returns:
str: Analysis of trends and patterns found across articles
"""
articles = articles or tool.agent._articles
if not articles:
return "No articles available for trend analysis"
# Combine all analyses for trend identification
combined_analyses = "\n".join(article['analysis'] for article in articles)
trend_prompt = f"""
Based on the analyses of {len(articles)} articles, please identify:
1. Common themes or topics across articles
2. Any notable patterns or trends
3. Different perspectives or viewpoints presented
Combined analyses:
{combined_analyses}
"""
return tool.agent.model(trend_prompt)
def main():
st.title("News Analysis Assistant")
st.write("Search and analyze recent news articles with natural language interaction.")
# Initialize session state
if 'agent' not in st.session_state:
st.session_state['agent'] = NewsAnalysisAgent(
tools=[search_news, analyze_article, identify_trends],
model=GroqLLM(),
additional_authorized_imports=[
"requests", "bs4", "duckduckgo_search", "pandas"
]
)
# News search interface
search_query = st.text_input("Enter news search query:")
if search_query:
with st.spinner('Searching news...'):
search_results = st.session_state['agent'].run(
f"Use the search_news tool to find recent articles about: {search_query}"
)
st.write(search_results)
# Article analysis interface
st.subheader("Article Analysis")
article_url = st.text_input("Enter article URL to analyze:")
if article_url:
with st.spinner('Analyzing article...'):
analysis = st.session_state['agent'].run(
f"Use the analyze_article tool to analyze this article: {article_url}"
)
st.write(analysis)
# Trend analysis interface
if st.button("Analyze Trends"):
with st.spinner('Identifying trends...'):
trends = st.session_state['agent'].run(
"Use the identify_trends tool to analyze patterns across all articles"
)
st.write(trends)
# Custom analysis interface
st.subheader("Custom Analysis")
question = st.text_input("What would you like to know about the news?")
if question:
with st.spinner('Analyzing...'):
result = st.session_state['agent'].run(question)
st.write(result)
if __name__ == "__main__":
main()