|
import streamlit as st |
|
import pandas as pd |
|
from smolagents import CodeAgent, tool |
|
from typing import Union, List, Dict |
|
from duckduckgo_search import DDGS |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from datetime import datetime, timedelta |
|
from groq import Groq |
|
import os |
|
import re |
|
|
|
class GroqLLM: |
|
"""Compatible LLM interface for smolagents CodeAgent""" |
|
def __init__(self, model_name="llama-3.1-8B-Instant"): |
|
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
self.model_name = model_name |
|
|
|
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str: |
|
"""Make the class callable as required by smolagents""" |
|
try: |
|
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt |
|
completion = self.client.chat.completions.create( |
|
model=self.model_name, |
|
messages=[{"role": "user", "content": prompt_str}], |
|
temperature=0.7, |
|
max_tokens=1024, |
|
stream=False |
|
) |
|
return completion.choices[0].message.content if completion.choices else "Error: No response generated" |
|
except Exception as e: |
|
return f"Error generating response: {str(e)}" |
|
|
|
class NewsAnalysisAgent(CodeAgent): |
|
"""Extended CodeAgent with news search and analysis capabilities""" |
|
def __init__(self, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
self._articles = [] |
|
self._search_results = [] |
|
|
|
@property |
|
def articles(self) -> List[Dict]: |
|
"""Access stored article data""" |
|
return self._articles |
|
|
|
@property |
|
def search_results(self) -> List[Dict]: |
|
"""Access stored search results""" |
|
return self._search_results |
|
|
|
def run(self, prompt: str) -> str: |
|
"""Override run method to include context about available tools""" |
|
enhanced_prompt = f""" |
|
You are a news analysis assistant that can: |
|
- Search for recent news articles |
|
- Extract and analyze article content |
|
- Summarize key points |
|
- Identify trends and patterns |
|
|
|
Task: {prompt} |
|
|
|
Use the provided tools to search and analyze news content. |
|
""" |
|
return super().run(enhanced_prompt) |
|
|
|
def extract_text_from_url(url: str) -> str: |
|
"""Helper function to extract text content from a URL using BeautifulSoup""" |
|
try: |
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
} |
|
response = requests.get(url, headers=headers, timeout=10) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']): |
|
element.decompose() |
|
|
|
|
|
paragraphs = soup.find_all('p') |
|
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip()) |
|
|
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
return text |
|
|
|
except Exception as e: |
|
return f"Error extracting text: {str(e)}" |
|
|
|
@tool |
|
def search_news(query: str, max_results: int = 5) -> str: |
|
"""Search for recent news articles using DuckDuckGo. |
|
|
|
Args: |
|
query: Search query string |
|
max_results: Maximum number of results to return |
|
|
|
Returns: |
|
str: Formatted string containing search results with titles and URLs |
|
""" |
|
try: |
|
with DDGS() as ddgs: |
|
search_results = list(ddgs.news( |
|
query, |
|
max_results=max_results, |
|
timeframe='d' |
|
)) |
|
|
|
|
|
tool.agent._search_results = search_results |
|
|
|
|
|
formatted_results = [] |
|
for idx, result in enumerate(search_results, 1): |
|
formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n") |
|
|
|
return "\n".join(formatted_results) |
|
except Exception as e: |
|
return f"Error searching news: {str(e)}" |
|
|
|
@tool |
|
def analyze_article(url: str) -> str: |
|
"""Extract and analyze content from a news article URL. |
|
|
|
Args: |
|
url: URL of the news article to analyze |
|
|
|
Returns: |
|
str: Analysis of the article including summary and key points |
|
""" |
|
try: |
|
|
|
content = extract_text_from_url(url) |
|
|
|
|
|
analysis_prompt = f""" |
|
Please analyze this article content and provide: |
|
1. A brief summary (2-3 sentences) |
|
2. Key points (3-5 main takeaways) |
|
3. Main topics/themes discussed |
|
|
|
Article content: |
|
{content[:3000]} # Limit content length for token constraints |
|
""" |
|
|
|
analysis = tool.agent.model(analysis_prompt) |
|
|
|
|
|
article_data = { |
|
'url': url, |
|
'content': content[:1000], |
|
'analysis': analysis, |
|
'date': datetime.now().strftime('%Y-%m-%d') |
|
} |
|
tool.agent._articles.append(article_data) |
|
|
|
return analysis |
|
except Exception as e: |
|
return f"Error analyzing article: {str(e)}" |
|
|
|
@tool |
|
def identify_trends(articles: List[Dict] = None) -> str: |
|
"""Identify common themes and trends across analyzed articles. |
|
|
|
Args: |
|
articles: List of analyzed article data (optional, uses stored articles if None) |
|
|
|
Returns: |
|
str: Analysis of trends and patterns found across articles |
|
""" |
|
articles = articles or tool.agent._articles |
|
|
|
if not articles: |
|
return "No articles available for trend analysis" |
|
|
|
|
|
combined_analyses = "\n".join(article['analysis'] for article in articles) |
|
|
|
trend_prompt = f""" |
|
Based on the analyses of {len(articles)} articles, please identify: |
|
1. Common themes or topics across articles |
|
2. Any notable patterns or trends |
|
3. Different perspectives or viewpoints presented |
|
|
|
Combined analyses: |
|
{combined_analyses} |
|
""" |
|
|
|
return tool.agent.model(trend_prompt) |
|
|
|
def main(): |
|
st.title("News Analysis Assistant") |
|
st.write("Search and analyze recent news articles with natural language interaction.") |
|
|
|
|
|
if 'agent' not in st.session_state: |
|
st.session_state['agent'] = NewsAnalysisAgent( |
|
tools=[search_news, analyze_article, identify_trends], |
|
model=GroqLLM(), |
|
additional_authorized_imports=[ |
|
"requests", "bs4", "duckduckgo_search", "pandas" |
|
] |
|
) |
|
|
|
|
|
search_query = st.text_input("Enter news search query:") |
|
if search_query: |
|
with st.spinner('Searching news...'): |
|
search_results = st.session_state['agent'].run( |
|
f"Use the search_news tool to find recent articles about: {search_query}" |
|
) |
|
st.write(search_results) |
|
|
|
|
|
st.subheader("Article Analysis") |
|
article_url = st.text_input("Enter article URL to analyze:") |
|
if article_url: |
|
with st.spinner('Analyzing article...'): |
|
analysis = st.session_state['agent'].run( |
|
f"Use the analyze_article tool to analyze this article: {article_url}" |
|
) |
|
st.write(analysis) |
|
|
|
|
|
if st.button("Analyze Trends"): |
|
with st.spinner('Identifying trends...'): |
|
trends = st.session_state['agent'].run( |
|
"Use the identify_trends tool to analyze patterns across all articles" |
|
) |
|
st.write(trends) |
|
|
|
|
|
st.subheader("Custom Analysis") |
|
question = st.text_input("What would you like to know about the news?") |
|
if question: |
|
with st.spinner('Analyzing...'): |
|
result = st.session_state['agent'].run(question) |
|
st.write(result) |
|
|
|
if __name__ == "__main__": |
|
main() |