|
import streamlit as st |
|
import pandas as pd |
|
from smolagents import CodeAgent, tool |
|
from typing import Union, List, Dict |
|
from duckduckgo_search import DDGS |
|
from newspaper import Article |
|
from datetime import datetime, timedelta |
|
import nltk |
|
from groq import Groq |
|
import os |
|
|
|
|
|
nltk.download('punkt') |
|
nltk.download('averaged_perceptron_tagger') |
|
nltk.download('maxent_ne_chunker') |
|
nltk.download('words') |
|
|
|
class GroqLLM: |
|
"""Compatible LLM interface for smolagents CodeAgent""" |
|
def __init__(self, model_name="llama-3.1-8B-Instant"): |
|
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
self.model_name = model_name |
|
|
|
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str: |
|
"""Make the class callable as required by smolagents""" |
|
try: |
|
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt |
|
completion = self.client.chat.completions.create( |
|
model=self.model_name, |
|
messages=[{"role": "user", "content": prompt_str}], |
|
temperature=0.7, |
|
max_tokens=1024, |
|
stream=False |
|
) |
|
return completion.choices[0].message.content if completion.choices else "Error: No response generated" |
|
except Exception as e: |
|
return f"Error generating response: {str(e)}" |
|
|
|
class NewsAnalysisAgent(CodeAgent): |
|
"""Extended CodeAgent with news search and analysis capabilities""" |
|
def __init__(self, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
self._articles = [] |
|
self._search_results = [] |
|
|
|
@property |
|
def articles(self) -> List[Dict]: |
|
"""Access stored article data""" |
|
return self._articles |
|
|
|
@property |
|
def search_results(self) -> List[Dict]: |
|
"""Access stored search results""" |
|
return self._search_results |
|
|
|
def run(self, prompt: str) -> str: |
|
"""Override run method to include context about available tools""" |
|
enhanced_prompt = f""" |
|
You are a news analysis assistant that can: |
|
- Search for recent news articles |
|
- Extract and analyze article content |
|
- Summarize key points |
|
- Identify trends and patterns |
|
|
|
Task: {prompt} |
|
|
|
Use the provided tools to search and analyze news content. |
|
""" |
|
return super().run(enhanced_prompt) |
|
|
|
@tool |
|
def search_news(query: str, max_results: int = 5) -> str: |
|
"""Search for recent news articles using DuckDuckGo. |
|
|
|
Args: |
|
query: Search query string |
|
max_results: Maximum number of results to return |
|
|
|
Returns: |
|
str: Formatted string containing search results with titles and URLs |
|
""" |
|
try: |
|
with DDGS() as ddgs: |
|
search_results = list(ddgs.news( |
|
query, |
|
max_results=max_results, |
|
timeframe='d' |
|
)) |
|
|
|
|
|
tool.agent._search_results = search_results |
|
|
|
|
|
formatted_results = [] |
|
for idx, result in enumerate(search_results, 1): |
|
formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n") |
|
|
|
return "\n".join(formatted_results) |
|
except Exception as e: |
|
return f"Error searching news: {str(e)}" |
|
|
|
@tool |
|
def analyze_article(url: str) -> str: |
|
"""Extract and analyze content from a news article URL. |
|
|
|
Args: |
|
url: URL of the news article to analyze |
|
|
|
Returns: |
|
str: Analysis of the article including summary, key points, and entities |
|
""" |
|
try: |
|
|
|
article = Article(url) |
|
article.download() |
|
article.parse() |
|
article.nlp() |
|
|
|
|
|
article_data = { |
|
'url': url, |
|
'title': article.title, |
|
'summary': article.summary, |
|
'keywords': article.keywords, |
|
'publish_date': article.publish_date |
|
} |
|
tool.agent._articles.append(article_data) |
|
|
|
|
|
analysis = f""" |
|
Title: {article.title} |
|
|
|
Summary: {article.summary} |
|
|
|
Key Points: |
|
{', '.join(article.keywords)} |
|
|
|
Publication Date: {article.publish_date} |
|
""" |
|
|
|
return analysis |
|
except Exception as e: |
|
return f"Error analyzing article: {str(e)}" |
|
|
|
@tool |
|
def identify_trends(articles: List[Dict] = None) -> str: |
|
"""Identify common themes and trends across analyzed articles. |
|
|
|
Args: |
|
articles: List of analyzed article data (optional, uses stored articles if None) |
|
|
|
Returns: |
|
str: Analysis of trends and patterns found across articles |
|
""" |
|
articles = articles or tool.agent._articles |
|
|
|
if not articles: |
|
return "No articles available for trend analysis" |
|
|
|
|
|
all_keywords = [] |
|
for article in articles: |
|
all_keywords.extend(article.get('keywords', [])) |
|
|
|
|
|
keyword_freq = pd.Series(all_keywords).value_counts() |
|
|
|
|
|
trends = f""" |
|
Common Themes: |
|
{', '.join(keyword_freq.head().index)} |
|
|
|
Articles Analyzed: {len(articles)} |
|
Timespan: {min(a['publish_date'] for a in articles if a.get('publish_date'))} to {max(a['publish_date'] for a in articles if a.get('publish_date'))} |
|
""" |
|
|
|
return trends |
|
|
|
def main(): |
|
st.title("News Analysis Assistant") |
|
st.write("Search and analyze recent news articles with natural language interaction.") |
|
|
|
|
|
if 'agent' not in st.session_state: |
|
st.session_state['agent'] = NewsAnalysisAgent( |
|
tools=[search_news, analyze_article, identify_trends], |
|
model=GroqLLM(), |
|
additional_authorized_imports=[ |
|
"newspaper", "nltk", "duckduckgo_search", "pandas" |
|
] |
|
) |
|
|
|
|
|
search_query = st.text_input("Enter news search query:") |
|
if search_query: |
|
with st.spinner('Searching news...'): |
|
search_results = st.session_state['agent'].run( |
|
f"Use the search_news tool to find recent articles about: {search_query}" |
|
) |
|
st.write(search_results) |
|
|
|
|
|
st.subheader("Article Analysis") |
|
article_url = st.text_input("Enter article URL to analyze:") |
|
if article_url: |
|
with st.spinner('Analyzing article...'): |
|
analysis = st.session_state['agent'].run( |
|
f"Use the analyze_article tool to analyze this article: {article_url}" |
|
) |
|
st.write(analysis) |
|
|
|
|
|
if st.button("Analyze Trends"): |
|
with st.spinner('Identifying trends...'): |
|
trends = st.session_state['agent'].run( |
|
"Use the identify_trends tool to analyze patterns across all articles" |
|
) |
|
st.write(trends) |
|
|
|
|
|
st.subheader("Custom Analysis") |
|
question = st.text_input("What would you like to know about the news?") |
|
if question: |
|
with st.spinner('Analyzing...'): |
|
result = st.session_state['agent'].run(question) |
|
st.write(result) |
|
|
|
if __name__ == "__main__": |
|
main() |