Spaces:

girishwangikar
/

SmolAgent_News_Analysis

Running

App Files Files Community

SmolAgent_News_Analysis / app.py

girishwangikar

Update app.py

fd1209e verified about 2 months ago

raw

history blame

8.37 kB

	import streamlit as st
	import pandas as pd
	from smolagents import CodeAgent, tool
	from typing import Union, List, Dict
	from duckduckgo_search import DDGS
	import requests
	from bs4 import BeautifulSoup
	from datetime import datetime, timedelta
	from groq import Groq
	import os
	import re

	class GroqLLM:
	"""Compatible LLM interface for smolagents CodeAgent"""
	def __init__(self, model_name="llama-3.1-8B-Instant"):
	self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
	self.model_name = model_name

	def __call__(self, prompt: Union[str, dict, List[Dict]]) -> str:
	"""Make the class callable as required by smolagents"""
	try:
	prompt_str = str(prompt) if isinstance(prompt, (dict, list)) else prompt
	completion = self.client.chat.completions.create(
	model=self.model_name,
	messages=[{"role": "user", "content": prompt_str}],
	temperature=0.7,
	max_tokens=1024,
	stream=False
	)
	return completion.choices[0].message.content if completion.choices else "Error: No response generated"
	except Exception as e:
	return f"Error generating response: {str(e)}"

	class NewsAnalysisAgent(CodeAgent):
	"""Extended CodeAgent with news search and analysis capabilities"""
	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)
	self._articles = []
	self._search_results = []

	@property
	def articles(self) -> List[Dict]:
	"""Access stored article data"""
	return self._articles

	@property
	def search_results(self) -> List[Dict]:
	"""Access stored search results"""
	return self._search_results

	def run(self, prompt: str) -> str:
	"""Override run method to include context about available tools"""
	enhanced_prompt = f"""
	You are a news analysis assistant that can:
	- Search for recent news articles
	- Extract and analyze article content
	- Summarize key points
	- Identify trends and patterns

	Task: {prompt}

	Use the provided tools to search and analyze news content.
	"""
	return super().run(enhanced_prompt)

	def extract_text_from_url(url: str) -> str:
	"""Helper function to extract text content from a URL using BeautifulSoup"""
	try:
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()

	soup = BeautifulSoup(response.text, 'html.parser')

	# Remove scripts, styles, and navigation elements
	for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
	element.decompose()

	# Extract text from paragraphs
	paragraphs = soup.find_all('p')
	text = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())

	# Basic cleaning
	text = re.sub(r'\s+', ' ', text)
	return text

	except Exception as e:
	return f"Error extracting text: {str(e)}"

	@tool
	def search_news(query: str, max_results: int = 5) -> str:
	"""Search for recent news articles using DuckDuckGo.

	Args:
	query: Search query string
	max_results: Maximum number of results to return

	Returns:
	str: Formatted string containing search results with titles and URLs
	"""
	try:
	with DDGS() as ddgs:
	search_results = list(ddgs.news(
	query,
	max_results=max_results,
	timeframe='d' # Last 24 hours
	))

	# Store results in agent
	tool.agent._search_results = search_results

	# Format results
	formatted_results = []
	for idx, result in enumerate(search_results, 1):
	formatted_results.append(f"{idx}. {result['title']}\n URL: {result['link']}\n Date: {result['date']}\n")

	return "\n".join(formatted_results)
	except Exception as e:
	return f"Error searching news: {str(e)}"

	@tool
	def analyze_article(url: str) -> str:
	"""Extract and analyze content from a news article URL.

	Args:
	url: URL of the news article to analyze

	Returns:
	str: Analysis of the article including summary and key points
	"""
	try:
	# Extract text content
	content = extract_text_from_url(url)

	# Use LLM to generate summary and analysis
	analysis_prompt = f"""
	Please analyze this article content and provide:
	1. A brief summary (2-3 sentences)
	2. Key points (3-5 main takeaways)
	3. Main topics/themes discussed

	Article content:
	{content[:3000]} # Limit content length for token constraints
	"""

	analysis = tool.agent.model(analysis_prompt)

	# Store article data
	article_data = {
	'url': url,
	'content': content[:1000], # Store truncated content
	'analysis': analysis,
	'date': datetime.now().strftime('%Y-%m-%d')
	}
	tool.agent._articles.append(article_data)

	return analysis
	except Exception as e:
	return f"Error analyzing article: {str(e)}"

	@tool
	def identify_trends(articles: List[Dict] = None) -> str:
	"""Identify common themes and trends across analyzed articles.

	Args:
	articles: List of analyzed article data (optional, uses stored articles if None)

	Returns:
	str: Analysis of trends and patterns found across articles
	"""
	articles = articles or tool.agent._articles

	if not articles:
	return "No articles available for trend analysis"

	# Combine all analyses for trend identification
	combined_analyses = "\n".join(article['analysis'] for article in articles)

	trend_prompt = f"""
	Based on the analyses of {len(articles)} articles, please identify:
	1. Common themes or topics across articles
	2. Any notable patterns or trends
	3. Different perspectives or viewpoints presented

	Combined analyses:
	{combined_analyses}
	"""

	return tool.agent.model(trend_prompt)

	def main():
	st.title("News Analysis Assistant")
	st.write("Search and analyze recent news articles with natural language interaction.")

	# Initialize session state
	if 'agent' not in st.session_state:
	st.session_state['agent'] = NewsAnalysisAgent(
	tools=[search_news, analyze_article, identify_trends],
	model=GroqLLM(),
	additional_authorized_imports=[
	"requests", "bs4", "duckduckgo_search", "pandas"
	]
	)

	# News search interface
	search_query = st.text_input("Enter news search query:")
	if search_query:
	with st.spinner('Searching news...'):
	search_results = st.session_state['agent'].run(
	f"Use the search_news tool to find recent articles about: {search_query}"
	)
	st.write(search_results)

	# Article analysis interface
	st.subheader("Article Analysis")
	article_url = st.text_input("Enter article URL to analyze:")
	if article_url:
	with st.spinner('Analyzing article...'):
	analysis = st.session_state['agent'].run(
	f"Use the analyze_article tool to analyze this article: {article_url}"
	)
	st.write(analysis)

	# Trend analysis interface
	if st.button("Analyze Trends"):
	with st.spinner('Identifying trends...'):
	trends = st.session_state['agent'].run(
	"Use the identify_trends tool to analyze patterns across all articles"
	)
	st.write(trends)

	# Custom analysis interface
	st.subheader("Custom Analysis")
	question = st.text_input("What would you like to know about the news?")
	if question:
	with st.spinner('Analyzing...'):
	result = st.session_state['agent'].run(question)
	st.write(result)

	if __name__ == "__main__":
	main()