girishwangikar's picture
Create app.py
98ffa93 verified
raw
history blame
7.5 kB
import streamlit as st
import pandas as pd
from smolagents import CodeAgent, tool
from typing import Union, List, Dict
from duckduckgo_search import DDGS
from newspaper import Article
from datetime import datetime, timedelta
import nltk
from groq import Groq
import os
# Download required NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
class GroqLLM:
"""Compatible LLM interface for smolagents CodeAgent"""
def __init__(self, model_name="llama-3.1-8B-Instant"):
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
self.model_name = model_name
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
"""Make the class callable as required by smolagents"""
try:
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
completion = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": prompt_str}],
temperature=0.7,
max_tokens=1024,
stream=False
)
return completion.choices[0].message.content if completion.choices else "Error: No response generated"
except Exception as e:
return f"Error generating response: {str(e)}"
class NewsAnalysisAgent(CodeAgent):
"""Extended CodeAgent with news search and analysis capabilities"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._articles = []
self._search_results = []
@property
def articles(self) -> List[Dict]:
"""Access stored article data"""
return self._articles
@property
def search_results(self) -> List[Dict]:
"""Access stored search results"""
return self._search_results
def run(self, prompt: str) -> str:
"""Override run method to include context about available tools"""
enhanced_prompt = f"""
You are a news analysis assistant that can:
- Search for recent news articles
- Extract and analyze article content
- Summarize key points
- Identify trends and patterns
Task: {prompt}
Use the provided tools to search and analyze news content.
"""
return super().run(enhanced_prompt)
@tool
def search_news(query: str, max_results: int = 5) -> str:
"""Search for recent news articles using DuckDuckGo.
Args:
query: Search query string
max_results: Maximum number of results to return
Returns:
str: Formatted string containing search results with titles and URLs
"""
try:
with DDGS() as ddgs:
search_results = list(ddgs.news(
query,
max_results=max_results,
timeframe='d' # Last 24 hours
))
# Store results in agent
tool.agent._search_results = search_results
# Format results
formatted_results = []
for idx, result in enumerate(search_results, 1):
formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n")
return "\n".join(formatted_results)
except Exception as e:
return f"Error searching news: {str(e)}"
@tool
def analyze_article(url: str) -> str:
"""Extract and analyze content from a news article URL.
Args:
url: URL of the news article to analyze
Returns:
str: Analysis of the article including summary, key points, and entities
"""
try:
# Download and parse article
article = Article(url)
article.download()
article.parse()
article.nlp()
# Store article data
article_data = {
'url': url,
'title': article.title,
'summary': article.summary,
'keywords': article.keywords,
'publish_date': article.publish_date
}
tool.agent._articles.append(article_data)
# Format analysis
analysis = f"""
Title: {article.title}
Summary: {article.summary}
Key Points:
{', '.join(article.keywords)}
Publication Date: {article.publish_date}
"""
return analysis
except Exception as e:
return f"Error analyzing article: {str(e)}"
@tool
def identify_trends(articles: List[Dict] = None) -> str:
"""Identify common themes and trends across analyzed articles.
Args:
articles: List of analyzed article data (optional, uses stored articles if None)
Returns:
str: Analysis of trends and patterns found across articles
"""
articles = articles or tool.agent._articles
if not articles:
return "No articles available for trend analysis"
# Collect all keywords
all_keywords = []
for article in articles:
all_keywords.extend(article.get('keywords', []))
# Count keyword frequencies
keyword_freq = pd.Series(all_keywords).value_counts()
# Format trends analysis
trends = f"""
Common Themes:
{', '.join(keyword_freq.head().index)}
Articles Analyzed: {len(articles)}
Timespan: {min(a['publish_date'] for a in articles if a.get('publish_date'))} to {max(a['publish_date'] for a in articles if a.get('publish_date'))}
"""
return trends
def main():
st.title("News Analysis Assistant")
st.write("Search and analyze recent news articles with natural language interaction.")
# Initialize session state
if 'agent' not in st.session_state:
st.session_state['agent'] = NewsAnalysisAgent(
tools=[search_news, analyze_article, identify_trends],
model=GroqLLM(),
additional_authorized_imports=[
"newspaper", "nltk", "duckduckgo_search", "pandas"
]
)
# News search interface
search_query = st.text_input("Enter news search query:")
if search_query:
with st.spinner('Searching news...'):
search_results = st.session_state['agent'].run(
f"Use the search_news tool to find recent articles about: {search_query}"
)
st.write(search_results)
# Article analysis interface
st.subheader("Article Analysis")
article_url = st.text_input("Enter article URL to analyze:")
if article_url:
with st.spinner('Analyzing article...'):
analysis = st.session_state['agent'].run(
f"Use the analyze_article tool to analyze this article: {article_url}"
)
st.write(analysis)
# Trend analysis interface
if st.button("Analyze Trends"):
with st.spinner('Identifying trends...'):
trends = st.session_state['agent'].run(
"Use the identify_trends tool to analyze patterns across all articles"
)
st.write(trends)
# Custom analysis interface
st.subheader("Custom Analysis")
question = st.text_input("What would you like to know about the news?")
if question:
with st.spinner('Analyzing...'):
result = st.session_state['agent'].run(question)
st.write(result)
if __name__ == "__main__":
main()