File size: 8,374 Bytes
98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 fd1209e 98ffa93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
import streamlit as st
import pandas as pd
from smolagents import CodeAgent, tool
from typing import Union, List, Dict
from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from groq import Groq
import os
import re
class GroqLLM:
"""Compatible LLM interface for smolagents CodeAgent"""
def __init__(self, model_name="llama-3.1-8B-Instant"):
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
self.model_name = model_name
def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
"""Make the class callable as required by smolagents"""
try:
prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
completion = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": prompt_str}],
temperature=0.7,
max_tokens=1024,
stream=False
)
return completion.choices[0].message.content if completion.choices else "Error: No response generated"
except Exception as e:
return f"Error generating response: {str(e)}"
class NewsAnalysisAgent(CodeAgent):
"""Extended CodeAgent with news search and analysis capabilities"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._articles = []
self._search_results = []
@property
def articles(self) -> List[Dict]:
"""Access stored article data"""
return self._articles
@property
def search_results(self) -> List[Dict]:
"""Access stored search results"""
return self._search_results
def run(self, prompt: str) -> str:
"""Override run method to include context about available tools"""
enhanced_prompt = f"""
You are a news analysis assistant that can:
- Search for recent news articles
- Extract and analyze article content
- Summarize key points
- Identify trends and patterns
Task: {prompt}
Use the provided tools to search and analyze news content.
"""
return super().run(enhanced_prompt)
def extract_text_from_url(url: str) -> str:
"""Helper function to extract text content from a URL using BeautifulSoup"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts, styles, and navigation elements
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
element.decompose()
# Extract text from paragraphs
paragraphs = soup.find_all('p')
text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
# Basic cleaning
text = re.sub(r'\s+', ' ', text)
return text
except Exception as e:
return f"Error extracting text: {str(e)}"
@tool
def search_news(query: str, max_results: int = 5) -> str:
"""Search for recent news articles using DuckDuckGo.
Args:
query: Search query string
max_results: Maximum number of results to return
Returns:
str: Formatted string containing search results with titles and URLs
"""
try:
with DDGS() as ddgs:
search_results = list(ddgs.news(
query,
max_results=max_results,
timeframe='d' # Last 24 hours
))
# Store results in agent
tool.agent._search_results = search_results
# Format results
formatted_results = []
for idx, result in enumerate(search_results, 1):
formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n")
return "\n".join(formatted_results)
except Exception as e:
return f"Error searching news: {str(e)}"
@tool
def analyze_article(url: str) -> str:
"""Extract and analyze content from a news article URL.
Args:
url: URL of the news article to analyze
Returns:
str: Analysis of the article including summary and key points
"""
try:
# Extract text content
content = extract_text_from_url(url)
# Use LLM to generate summary and analysis
analysis_prompt = f"""
Please analyze this article content and provide:
1. A brief summary (2-3 sentences)
2. Key points (3-5 main takeaways)
3. Main topics/themes discussed
Article content:
{content[:3000]} # Limit content length for token constraints
"""
analysis = tool.agent.model(analysis_prompt)
# Store article data
article_data = {
'url': url,
'content': content[:1000], # Store truncated content
'analysis': analysis,
'date': datetime.now().strftime('%Y-%m-%d')
}
tool.agent._articles.append(article_data)
return analysis
except Exception as e:
return f"Error analyzing article: {str(e)}"
@tool
def identify_trends(articles: List[Dict] = None) -> str:
"""Identify common themes and trends across analyzed articles.
Args:
articles: List of analyzed article data (optional, uses stored articles if None)
Returns:
str: Analysis of trends and patterns found across articles
"""
articles = articles or tool.agent._articles
if not articles:
return "No articles available for trend analysis"
# Combine all analyses for trend identification
combined_analyses = "\n".join(article['analysis'] for article in articles)
trend_prompt = f"""
Based on the analyses of {len(articles)} articles, please identify:
1. Common themes or topics across articles
2. Any notable patterns or trends
3. Different perspectives or viewpoints presented
Combined analyses:
{combined_analyses}
"""
return tool.agent.model(trend_prompt)
def main():
st.title("News Analysis Assistant")
st.write("Search and analyze recent news articles with natural language interaction.")
# Initialize session state
if 'agent' not in st.session_state:
st.session_state['agent'] = NewsAnalysisAgent(
tools=[search_news, analyze_article, identify_trends],
model=GroqLLM(),
additional_authorized_imports=[
"requests", "bs4", "duckduckgo_search", "pandas"
]
)
# News search interface
search_query = st.text_input("Enter news search query:")
if search_query:
with st.spinner('Searching news...'):
search_results = st.session_state['agent'].run(
f"Use the search_news tool to find recent articles about: {search_query}"
)
st.write(search_results)
# Article analysis interface
st.subheader("Article Analysis")
article_url = st.text_input("Enter article URL to analyze:")
if article_url:
with st.spinner('Analyzing article...'):
analysis = st.session_state['agent'].run(
f"Use the analyze_article tool to analyze this article: {article_url}"
)
st.write(analysis)
# Trend analysis interface
if st.button("Analyze Trends"):
with st.spinner('Identifying trends...'):
trends = st.session_state['agent'].run(
"Use the identify_trends tool to analyze patterns across all articles"
)
st.write(trends)
# Custom analysis interface
st.subheader("Custom Analysis")
question = st.text_input("What would you like to know about the news?")
if question:
with st.spinner('Analyzing...'):
result = st.session_state['agent'].run(question)
st.write(result)
if __name__ == "__main__":
main() |