Spaces:

girishwangikar
/

SmolAgent_News_Analysis

Running

File size: 8,374 Bytes

import streamlit as st
import pandas as pd
from smolagents import CodeAgent, tool
from typing import Union, List, Dict
from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from groq import Groq
import os
import re

class GroqLLM:
    """Compatible LLM interface for smolagents CodeAgent"""
    def __init__(self, model_name="llama-3.1-8B-Instant"):
        self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
        self.model_name = model_name
    
    def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
        """Make the class callable as required by smolagents"""
        try:
            prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
            completion = self.client.chat.completions.create(
                model=self.model_name,
                messages=[{"role": "user", "content": prompt_str}],
                temperature=0.7,
                max_tokens=1024,
                stream=False
            )
            return completion.choices[0].message.content if completion.choices else "Error: No response generated"
        except Exception as e:
            return f"Error generating response: {str(e)}"

class NewsAnalysisAgent(CodeAgent):
    """Extended CodeAgent with news search and analysis capabilities"""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._articles = []
        self._search_results = []
    
    @property
    def articles(self) -> List[Dict]:
        """Access stored article data"""
        return self._articles
    
    @property
    def search_results(self) -> List[Dict]:
        """Access stored search results"""
        return self._search_results
    
    def run(self, prompt: str) -> str:
        """Override run method to include context about available tools"""
        enhanced_prompt = f"""
        You are a news analysis assistant that can:
        - Search for recent news articles
        - Extract and analyze article content
        - Summarize key points
        - Identify trends and patterns
        
        Task: {prompt}
        
        Use the provided tools to search and analyze news content.
        """
        return super().run(enhanced_prompt)

def extract_text_from_url(url: str) -> str:
    """Helper function to extract text content from a URL using BeautifulSoup"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove scripts, styles, and navigation elements
        for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
            element.decompose()
        
        # Extract text from paragraphs
        paragraphs = soup.find_all('p')
        text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
        
        # Basic cleaning
        text = re.sub(r'\s+', ' ', text)
        return text
    
    except Exception as e:
        return f"Error extracting text: {str(e)}"

@tool
def search_news(query: str, max_results: int = 5) -> str:
    """Search for recent news articles using DuckDuckGo.
    
    Args:
        query: Search query string
        max_results: Maximum number of results to return
    
    Returns:
        str: Formatted string containing search results with titles and URLs
    """
    try:
        with DDGS() as ddgs:
            search_results = list(ddgs.news(
                query,
                max_results=max_results,
                timeframe='d'  # Last 24 hours
            ))
        
        # Store results in agent
        tool.agent._search_results = search_results
        
        # Format results
        formatted_results = []
        for idx, result in enumerate(search_results, 1):
            formatted_results.append(f"{idx}. {result['title']}\n   URL: {result['link']}\n   Date: {result['date']}\n")
        
        return "\n".join(formatted_results)
    except Exception as e:
        return f"Error searching news: {str(e)}"

@tool
def analyze_article(url: str) -> str:
    """Extract and analyze content from a news article URL.
    
    Args:
        url: URL of the news article to analyze
    
    Returns:
        str: Analysis of the article including summary and key points
    """
    try:
        # Extract text content
        content = extract_text_from_url(url)
        
        # Use LLM to generate summary and analysis
        analysis_prompt = f"""
        Please analyze this article content and provide:
        1. A brief summary (2-3 sentences)
        2. Key points (3-5 main takeaways)
        3. Main topics/themes discussed
        
        Article content:
        {content[:3000]}  # Limit content length for token constraints
        """
        
        analysis = tool.agent.model(analysis_prompt)
        
        # Store article data
        article_data = {
            'url': url,
            'content': content[:1000],  # Store truncated content
            'analysis': analysis,
            'date': datetime.now().strftime('%Y-%m-%d')
        }
        tool.agent._articles.append(article_data)
        
        return analysis
    except Exception as e:
        return f"Error analyzing article: {str(e)}"

@tool
def identify_trends(articles: List[Dict] = None) -> str:
    """Identify common themes and trends across analyzed articles.
    
    Args:
        articles: List of analyzed article data (optional, uses stored articles if None)
    
    Returns:
        str: Analysis of trends and patterns found across articles
    """
    articles = articles or tool.agent._articles
    
    if not articles:
        return "No articles available for trend analysis"
    
    # Combine all analyses for trend identification
    combined_analyses = "\n".join(article['analysis'] for article in articles)
    
    trend_prompt = f"""
    Based on the analyses of {len(articles)} articles, please identify:
    1. Common themes or topics across articles
    2. Any notable patterns or trends
    3. Different perspectives or viewpoints presented
    
    Combined analyses:
    {combined_analyses}
    """
    
    return tool.agent.model(trend_prompt)

def main():
    st.title("News Analysis Assistant")
    st.write("Search and analyze recent news articles with natural language interaction.")
    
    # Initialize session state
    if 'agent' not in st.session_state:
        st.session_state['agent'] = NewsAnalysisAgent(
            tools=[search_news, analyze_article, identify_trends],
            model=GroqLLM(),
            additional_authorized_imports=[
                "requests", "bs4", "duckduckgo_search", "pandas"
            ]
        )
    
    # News search interface
    search_query = st.text_input("Enter news search query:")
    if search_query:
        with st.spinner('Searching news...'):
            search_results = st.session_state['agent'].run(
                f"Use the search_news tool to find recent articles about: {search_query}"
            )
            st.write(search_results)
    
    # Article analysis interface
    st.subheader("Article Analysis")
    article_url = st.text_input("Enter article URL to analyze:")
    if article_url:
        with st.spinner('Analyzing article...'):
            analysis = st.session_state['agent'].run(
                f"Use the analyze_article tool to analyze this article: {article_url}"
            )
            st.write(analysis)
    
    # Trend analysis interface
    if st.button("Analyze Trends"):
        with st.spinner('Identifying trends...'):
            trends = st.session_state['agent'].run(
                "Use the identify_trends tool to analyze patterns across all articles"
            )
            st.write(trends)
    
    # Custom analysis interface
    st.subheader("Custom Analysis")
    question = st.text_input("What would you like to know about the news?")
    if question:
        with st.spinner('Analyzing...'):
            result = st.session_state['agent'].run(question)
            st.write(result)

if __name__ == "__main__":
    main()