File size: 8,374 Bytes
98ffa93
 
 
 
 
fd1209e
 
98ffa93
 
 
fd1209e
98ffa93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd1209e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98ffa93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd1209e
98ffa93
 
fd1209e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98ffa93
 
 
 
fd1209e
 
 
98ffa93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd1209e
 
98ffa93
fd1209e
 
 
 
 
98ffa93
fd1209e
 
98ffa93
 
fd1209e
98ffa93
 
 
 
 
 
 
 
 
 
 
fd1209e
98ffa93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import streamlit as st
import pandas as pd
from smolagents import CodeAgent, tool
from typing import Union, List, Dict
from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from groq import Groq
import os
import re

class GroqLLM:
    """Compatible LLM interface for smolagents CodeAgent"""
    def __init__(self, model_name="llama-3.1-8B-Instant"):
        self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
        self.model_name = model_name
    
    def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
        """Make the class callable as required by smolagents"""
        try:
            prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
            completion = self.client.chat.completions.create(
                model=self.model_name,
                messages=[{"role": "user", "content": prompt_str}],
                temperature=0.7,
                max_tokens=1024,
                stream=False
            )
            return completion.choices[0].message.content if completion.choices else "Error: No response generated"
        except Exception as e:
            return f"Error generating response: {str(e)}"

class NewsAnalysisAgent(CodeAgent):
    """Extended CodeAgent with news search and analysis capabilities"""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._articles = []
        self._search_results = []
    
    @property
    def articles(self) -> List[Dict]:
        """Access stored article data"""
        return self._articles
    
    @property
    def search_results(self) -> List[Dict]:
        """Access stored search results"""
        return self._search_results
    
    def run(self, prompt: str) -> str:
        """Override run method to include context about available tools"""
        enhanced_prompt = f"""
        You are a news analysis assistant that can:
        - Search for recent news articles
        - Extract and analyze article content
        - Summarize key points
        - Identify trends and patterns
        
        Task: {prompt}
        
        Use the provided tools to search and analyze news content.
        """
        return super().run(enhanced_prompt)

def extract_text_from_url(url: str) -> str:
    """Helper function to extract text content from a URL using BeautifulSoup"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove scripts, styles, and navigation elements
        for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
            element.decompose()
        
        # Extract text from paragraphs
        paragraphs = soup.find_all('p')
        text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
        
        # Basic cleaning
        text = re.sub(r'\s+', ' ', text)
        return text
    
    except Exception as e:
        return f"Error extracting text: {str(e)}"

@tool
def search_news(query: str, max_results: int = 5) -> str:
    """Search for recent news articles using DuckDuckGo.
    
    Args:
        query: Search query string
        max_results: Maximum number of results to return
    
    Returns:
        str: Formatted string containing search results with titles and URLs
    """
    try:
        with DDGS() as ddgs:
            search_results = list(ddgs.news(
                query,
                max_results=max_results,
                timeframe='d'  # Last 24 hours
            ))
        
        # Store results in agent
        tool.agent._search_results = search_results
        
        # Format results
        formatted_results = []
        for idx, result in enumerate(search_results, 1):
            formatted_results.append(f"{idx}. {result['title']}\n   URL: {result['link']}\n   Date: {result['date']}\n")
        
        return "\n".join(formatted_results)
    except Exception as e:
        return f"Error searching news: {str(e)}"

@tool
def analyze_article(url: str) -> str:
    """Extract and analyze content from a news article URL.
    
    Args:
        url: URL of the news article to analyze
    
    Returns:
        str: Analysis of the article including summary and key points
    """
    try:
        # Extract text content
        content = extract_text_from_url(url)
        
        # Use LLM to generate summary and analysis
        analysis_prompt = f"""
        Please analyze this article content and provide:
        1. A brief summary (2-3 sentences)
        2. Key points (3-5 main takeaways)
        3. Main topics/themes discussed
        
        Article content:
        {content[:3000]}  # Limit content length for token constraints
        """
        
        analysis = tool.agent.model(analysis_prompt)
        
        # Store article data
        article_data = {
            'url': url,
            'content': content[:1000],  # Store truncated content
            'analysis': analysis,
            'date': datetime.now().strftime('%Y-%m-%d')
        }
        tool.agent._articles.append(article_data)
        
        return analysis
    except Exception as e:
        return f"Error analyzing article: {str(e)}"

@tool
def identify_trends(articles: List[Dict] = None) -> str:
    """Identify common themes and trends across analyzed articles.
    
    Args:
        articles: List of analyzed article data (optional, uses stored articles if None)
    
    Returns:
        str: Analysis of trends and patterns found across articles
    """
    articles = articles or tool.agent._articles
    
    if not articles:
        return "No articles available for trend analysis"
    
    # Combine all analyses for trend identification
    combined_analyses = "\n".join(article['analysis'] for article in articles)
    
    trend_prompt = f"""
    Based on the analyses of {len(articles)} articles, please identify:
    1. Common themes or topics across articles
    2. Any notable patterns or trends
    3. Different perspectives or viewpoints presented
    
    Combined analyses:
    {combined_analyses}
    """
    
    return tool.agent.model(trend_prompt)

def main():
    st.title("News Analysis Assistant")
    st.write("Search and analyze recent news articles with natural language interaction.")
    
    # Initialize session state
    if 'agent' not in st.session_state:
        st.session_state['agent'] = NewsAnalysisAgent(
            tools=[search_news, analyze_article, identify_trends],
            model=GroqLLM(),
            additional_authorized_imports=[
                "requests", "bs4", "duckduckgo_search", "pandas"
            ]
        )
    
    # News search interface
    search_query = st.text_input("Enter news search query:")
    if search_query:
        with st.spinner('Searching news...'):
            search_results = st.session_state['agent'].run(
                f"Use the search_news tool to find recent articles about: {search_query}"
            )
            st.write(search_results)
    
    # Article analysis interface
    st.subheader("Article Analysis")
    article_url = st.text_input("Enter article URL to analyze:")
    if article_url:
        with st.spinner('Analyzing article...'):
            analysis = st.session_state['agent'].run(
                f"Use the analyze_article tool to analyze this article: {article_url}"
            )
            st.write(analysis)
    
    # Trend analysis interface
    if st.button("Analyze Trends"):
        with st.spinner('Identifying trends...'):
            trends = st.session_state['agent'].run(
                "Use the identify_trends tool to analyze patterns across all articles"
            )
            st.write(trends)
    
    # Custom analysis interface
    st.subheader("Custom Analysis")
    question = st.text_input("What would you like to know about the news?")
    if question:
        with st.spinner('Analyzing...'):
            result = st.session_state['agent'].run(question)
            st.write(result)

if __name__ == "__main__":
    main()