"""
News API Collector
Collects industry news and trends for feature validation
"""

import asyncio
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
import os
from newsapi import NewsApiClient
import aiohttp
from textblob import TextBlob
import re
from dotenv import load_dotenv
from utils.logger import get_logger
from utils.nlp_query_enhancer import NLPQueryEnhancer

# Load environment variables
load_dotenv()

logger = get_logger(__name__)

class NewsCollector:
    """Collects and analyzes news for market trends"""
    
    def __init__(self):
        """Initialize News API client"""
        self.news_api = None
        self.session = None
        self.nlp_enhancer = NLPQueryEnhancer()
        self.setup_news_client()
    
    def setup_news_client(self):
        """Setup News API client with credentials"""
        logger.log_processing_step("NewsAPI client setup", "started")
        
        try:
            # Try both possible environment variable names
            api_key = os.getenv("NEWS_API_KEY") or os.getenv("NEWSAPI_KEY")
            
            if not api_key:
                error_msg = "No NewsAPI key found in environment variables (NEWS_API_KEY or NEWSAPI_KEY)"
                logger.log_api_failure("NewsAPI", error_msg)
                self.news_api = None
                return
            
            logger.log_api_attempt("NewsAPI", f"with key ending in ...{api_key[-4:]}")
            self.news_api = NewsApiClient(api_key=api_key)
            logger.log_api_success("NewsAPI", "client initialized")
            logger.log_processing_step("NewsAPI client setup", "completed")
            
        except Exception as e:
            error_msg = f"NewsAPI client setup failed: {str(e)}"
            logger.log_api_failure("NewsAPI", error_msg)
            logger.log_processing_step("NewsAPI client setup", "failed")
            self.news_api = None
    
    async def __aenter__(self):
        """Async context manager entry"""
        connector = aiohttp.TCPConnector(
            limit=10,
            limit_per_host=5,
            ttl_dns_cache=300,
            use_dns_cache=True,
            keepalive_timeout=30,
            enable_cleanup_closed=True
        )
        
        timeout = aiohttp.ClientTimeout(total=30)
        
        self.session = aiohttp.ClientSession(
            connector=connector,
            timeout=timeout
        )
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit with proper cleanup"""
        await self.cleanup()
    
    async def cleanup(self):
        """Clean up session and connections properly"""
        try:
            if self.session and not self.session.closed:
                await self.session.close()
                logger.debug("News collector session closed properly")
        except Exception as e:
            logger.debug(f"Error during news collector cleanup: {str(e)}")
        finally:
            self.session = None
            # Give time for connections to close
            await asyncio.sleep(0.1)
    
    def clean_text(self, text: str) -> str:
        """Clean and preprocess text for analysis"""
        if not text:
            return ""
        
        # Remove URLs, special characters
        text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text, flags=re.MULTILINE)
        text = re.sub(r'[^\\w\\s]', ' ', text)
        
        # Remove extra whitespace
        text = ' '.join(text.split())
        
        return text.strip()
    
    def extract_keywords(self, text: str, min_length: int = 4, max_keywords: int = 10) -> List[str]:
        """Extract important keywords from text"""
        if not text:
            return []
        
        try:
            # Use TextBlob for noun phrase extraction
            blob = TextBlob(text)
            
            # Extract noun phrases and filter for length
            keywords = [phrase for phrase in blob.noun_phrases if len(phrase) >= min_length] # type: ignore
            
            # Return top keywords
            return keywords[:max_keywords]
            
        except Exception as e:
            logger.error(f"Error extracting keywords: {str(e)}")
            return []
    
    def optimize_search_query(self, feature_description: str) -> List[str]:
        """
        Optimize search query by breaking down complex descriptions into searchable terms
        
        Args:
            feature_description: Original feature description
            
        Returns:
            List of optimized search queries
        """
        queries = []
        
        # Extract key concepts using common patterns
        key_terms = []
        
        # Common technology terms
        tech_terms = ['ai', 'artificial intelligence', 'machine learning', 'ml', 'app', 'mobile', 'web', 'platform', 'software', 'tool', 'system']
        
        # Common business/industry terms  
        business_terms = ['fitness', 'health', 'nutrition', 'training', 'workout', 'exercise', 'diet', 'wellness', 'personal trainer', 'coaching']
        
        # Convert to lowercase for matching
        description_lower = feature_description.lower()
        
        # Extract relevant tech terms
        for term in tech_terms:
            if term in description_lower:
                key_terms.append(term)
        
        # Extract relevant business terms
        for term in business_terms:
            if term in description_lower:
                key_terms.append(term)
        
        # Create different query strategies
        if key_terms:
            # Strategy 1: Core concept combinations
            if 'ai' in key_terms or 'artificial intelligence' in key_terms:
                if 'fitness' in key_terms:
                    queries.extend([
                        'AI fitness app',
                        'artificial intelligence fitness',
                        'AI personal trainer',
                        'smart fitness technology'
                    ])
                if 'nutrition' in key_terms:
                    queries.extend([
                        'AI nutrition app',
                        'artificial intelligence diet',
                        'smart meal planning'
                    ])
            
            # Strategy 2: Industry-specific terms
            if 'fitness' in key_terms:
                queries.extend([
                    'fitness app market',
                    'digital fitness trends',
                    'fitness technology',
                    'workout app industry'
                ])
            
            if 'nutrition' in key_terms:
                queries.extend([
                    'nutrition app market',
                    'diet tracking technology',
                    'meal planning apps'
                ])
            
            # Strategy 3: Broader market terms
            queries.extend([
                'health tech startup',
                'wellness app market',
                'fitness industry trends',
                'health technology news'
            ])
        
        # Fallback: Extract noun phrases from original description
        try:
            blob = TextBlob(feature_description)
            noun_phrases = [phrase for phrase in blob.noun_phrases if len(phrase.split()) <= 3] # type: ignore
            
            # Add top noun phrases as queries
            for phrase in noun_phrases[:5]:
                if len(phrase) > 3:  # Skip very short phrases
                    queries.append(phrase)
        except:
            pass
        
        # Remove duplicates and return unique queries
        unique_queries = list(dict.fromkeys(queries))
        
        # Limit to reasonable number of queries
        return unique_queries[:8]
    
    def create_search_variations(self, base_query: str) -> List[str]:
        """
        Create variations of a search query for better coverage
        
        Args:
            base_query: Base search query
            
        Returns:
            List of query variations
        """
        variations = [base_query]
        
        # Add industry context
        variations.extend([
            f"{base_query} industry",
            f"{base_query} market",
            f"{base_query} trends",
            f"{base_query} startup",
            f"{base_query} technology"
        ])
        
        # Add temporal context
        variations.extend([
            f"{base_query} 2024",
            f"{base_query} latest",
            f"{base_query} new"
        ])
        
        return variations
    
    def _analyze_article_sentiment(self, text: str) -> Dict[str, Any]:
        """
        Enhanced sentiment analysis specifically tuned for news articles
        
        Args:
            text: Article text (title + description)
            
        Returns:
            Detailed sentiment analysis with improved thresholds
        """
        if not text:
            return {
                "polarity": 0.0,
                "subjectivity": 0.0,
                "classification": "neutral",
                "confidence": 0.0,
                "keyword_sentiment": 0.0,
                "debug_info": {"error": "Empty text"}
            }
        
        try:
            # Basic TextBlob analysis
            blob = TextBlob(text)
            textblob_polarity = blob.sentiment.polarity # type: ignore
            textblob_subjectivity = blob.sentiment.subjectivity # type: ignore
            
            # Keyword-based sentiment analysis for news content
            keyword_sentiment = self._analyze_keyword_sentiment(text.lower())
            
            # Combine TextBlob and keyword-based analysis
            # Give more weight to keyword analysis for news content
            combined_polarity = (textblob_polarity * 0.6) + (keyword_sentiment * 0.4)
            
            # More sensitive thresholds for news articles
            NEWS_POSITIVE_THRESHOLD = 0.05  # Lowered from 0.1
            NEWS_NEGATIVE_THRESHOLD = -0.05  # Lowered from -0.1
            
            # Classify sentiment with new thresholds
            if combined_polarity >= NEWS_POSITIVE_THRESHOLD:
                classification = "positive"
            elif combined_polarity <= NEWS_NEGATIVE_THRESHOLD:
                classification = "negative"
            else:
                classification = "neutral"
            
            # Calculate confidence based on distance from neutral
            confidence = min(abs(combined_polarity) * 2, 1.0)  # Scale to 0-1
            
            return {
                "polarity": combined_polarity,
                "subjectivity": textblob_subjectivity,
                "classification": classification,
                "confidence": confidence,
                "keyword_sentiment": keyword_sentiment,
                "debug_info": {
                    "textblob_polarity": textblob_polarity,
                    "keyword_sentiment": keyword_sentiment,
                    "thresholds": {
                        "positive": NEWS_POSITIVE_THRESHOLD,
                        "negative": NEWS_NEGATIVE_THRESHOLD
                    }
                }
            }
            
        except Exception as e:
            logger.error(f"Error in enhanced sentiment analysis: {str(e)}")
            return {
                "polarity": 0.0,
                "subjectivity": 0.0,
                "classification": "neutral",
                "confidence": 0.0,
                "keyword_sentiment": 0.0,
                "debug_info": {"error": str(e)}
            }
    
    def _analyze_keyword_sentiment(self, text: str) -> float:
        """
        Analyze sentiment based on business/news-specific keywords
        
        Args:
            text: Lowercase text to analyze
            
        Returns:
            Sentiment score from keyword analysis (-1.0 to 1.0)
        """
        # Negative keywords with weights
        negative_keywords = {
            # Strong negative
            'bankruptcy': -0.9, 'fraud': -0.9, 'scandal': -0.8, 'crisis': -0.8,
            'collapse': -0.8, 'failure': -0.7, 'lawsuit': -0.7, 'breach': -0.7,
            'hack': -0.7, 'cyberattack': -0.7, 'data breach': -0.8,
            
            # Moderate negative
            'decline': -0.6, 'loss': -0.6, 'drop': -0.5, 'fall': -0.5,
            'concern': -0.5, 'risk': -0.5, 'problem': -0.5, 'issue': -0.4,
            'challenge': -0.4, 'difficulty': -0.4, 'struggle': -0.5,
            'layoff': -0.6, 'shutdown': -0.7, 'closure': -0.6,
            
            # Business negative
            'recession': -0.7, 'downturn': -0.6, 'crash': -0.8,
            'volatile': -0.5, 'uncertainty': -0.4, 'unstable': -0.5,
            'disappointing': -0.5, 'missed expectations': -0.6,
            'below forecast': -0.5, 'underperform': -0.5
        }
        
        # Positive keywords with weights
        positive_keywords = {
            # Strong positive
            'breakthrough': 0.8, 'innovation': 0.7, 'revolutionary': 0.8,
            'success': 0.7, 'achievement': 0.6, 'milestone': 0.6,
            'record': 0.6, 'best': 0.5, 'excellent': 0.6,
            
            # Growth/business positive
            'growth': 0.6, 'increase': 0.5, 'rise': 0.5, 'surge': 0.6,
            'expansion': 0.6, 'profit': 0.5, 'revenue': 0.4, 'gain': 0.5,
            'boost': 0.5, 'improve': 0.5, 'enhance': 0.4,
            'opportunity': 0.4, 'potential': 0.4, 'promising': 0.5,
            
            # Market positive
            'bullish': 0.6, 'optimistic': 0.5, 'confident': 0.5,
            'strong': 0.4, 'robust': 0.5, 'solid': 0.4,
            'outperform': 0.5, 'exceed expectations': 0.6,
            'beat forecast': 0.6, 'above estimates': 0.5
        }
        
        # Calculate weighted sentiment score
        sentiment_score = 0.0
        total_weight = 0.0
        
        # Check negative keywords
        for keyword, weight in negative_keywords.items():
            if keyword in text:
                sentiment_score += weight
                total_weight += abs(weight)
        
        # Check positive keywords
        for keyword, weight in positive_keywords.items():
            if keyword in text:
                sentiment_score += weight
                total_weight += abs(weight)
        
        # Normalize by total weight to prevent extreme scores
        if total_weight > 0:
            sentiment_score = sentiment_score / max(total_weight, 1.0)
        
        # Ensure score is within bounds
        return max(-1.0, min(1.0, sentiment_score))
    
    async def search_news(self, query: str, days_back: int = 30, language: str = "en") -> Dict[str, Any]:
        """
        Search for news articles related to a query with enhanced search strategies
        
        Args:
            query: Search query
            days_back: Number of days to look back
            language: Language code
            
        Returns:
            News search results or error details
        """
        logger.log_processing_step(f"News search for '{query}'", "started")
        
        if not self.news_api:
            error_msg = "NewsAPI client not initialized - no valid API key available"
            logger.log_api_failure("NewsAPI", error_msg)
            logger.log_processing_step(f"News search for '{query}'", "failed")
            return {
                "query": query,
                "error": error_msg,
                "error_type": "no_api_client",
                "total_results": 0,
                "articles": [],
                "searched_at": datetime.now().isoformat()
            }
        
        try:
            logger.log_api_attempt("NewsAPI", f"searching for '{query}' ({days_back} days back)")
            
            # Calculate date range
            from_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
            to_date = datetime.now().strftime("%Y-%m-%d")
            
            # Try multiple search strategies if the first one fails
            search_strategies = [
                # Strategy 1: Exact query
                {"q": query, "sort_by": "relevancy"},
                # Strategy 2: If query has multiple words, try OR search
                {"q": " OR ".join(query.split()) if len(query.split()) > 1 else query, "sort_by": "relevancy"},
                # Strategy 3: Try with quotes for exact phrase (if not already quoted)
                {"q": f'"{query}"' if '"' not in query else query, "sort_by": "publishedAt"}
            ]
            
            response = None
            successful_strategy = None
            
            for i, strategy in enumerate(search_strategies):
                try:
                    response = self.news_api.get_everything(
                        from_param=from_date,
                        to=to_date,
                        language=language,
                        **strategy
                    )
                    
                    # Check if we got results
                    if response and response.get("totalResults", 0) > 0:
                        successful_strategy = i + 1
                        logger.info(f"Search strategy {successful_strategy} successful for query: {strategy['q']}")
                        break
                    elif i == 0:  # Log if first strategy failed
                        logger.info(f"Search strategy 1 returned 0 results, trying alternative strategies")
                        
                except Exception as strategy_error:
                    logger.warning(f"Search strategy {i + 1} failed: {str(strategy_error)}")
                    continue
            
            # If no strategy worked, use the last response (even if empty)
            if response is None:
                response = {"articles": [], "totalResults": 0}
            
            # Process and return results
            articles = response.get("articles", [])
            total_results = response.get("totalResults", 0)
            
            logger.log_api_success("NewsAPI", f"found {total_results} total results, processing {len(articles)} articles")
            
            processed_results = {
                "query": query,
                "total_results": total_results,
                "days_back": days_back,
                "articles": [],
                "sources": {},
                "keywords": {},
                "searched_at": datetime.now().isoformat()
            }
            
            # Process each article
            for article in articles:
                # Extract article data
                processed_article = {
                    "title": article.get("title", ""),
                    "source": article.get("source", {}).get("name", "Unknown"),
                    "author": article.get("author", ""),
                    "published_at": article.get("publishedAt", ""),
                    "description": article.get("description", ""),
                    "url": article.get("url", ""),
                    "content": article.get("content", "")
                }
                
                # Extract keywords
                full_text = f"{processed_article['title']} {processed_article['description']}"
                keywords = self.extract_keywords(full_text)
                processed_article["keywords"] = keywords
                
                # Enhanced sentiment analysis
                sentiment_analysis = self._analyze_article_sentiment(full_text)
                processed_article["sentiment"] = sentiment_analysis
                
                # Add to results
                processed_results["articles"].append(processed_article)
                
                # Update source statistics
                source = processed_article["source"]
                if source not in processed_results["sources"]:
                    processed_results["sources"][source] = 0
                processed_results["sources"][source] += 1
                
                # Update keyword statistics
                for keyword in keywords:
                    if keyword not in processed_results["keywords"]:
                        processed_results["keywords"][keyword] = 0
                    processed_results["keywords"][keyword] += 1
            
            # Add sentiment distribution logging for debugging
            sentiment_debug = {"positive": 0, "negative": 0, "neutral": 0}
            for article in processed_results["articles"]:
                classification = article.get("sentiment", {}).get("classification", "neutral")
                sentiment_debug[classification] += 1
            
            logger.info(f"Sentiment distribution for '{query}': {sentiment_debug}")
            logger.log_data_collection(f"NewsAPI articles for '{query}'", len(processed_results['articles']))
            logger.log_processing_step(f"News search for '{query}'", "completed")
            
            return processed_results
            
        except Exception as e:
            error_msg = f"NewsAPI search failed for '{query}': {str(e)}"
            logger.log_api_failure("NewsAPI", error_msg)
            logger.log_processing_step(f"News search for '{query}'", "failed")
            
            # Return detailed error information
            return {
                "query": query,
                "error": error_msg,
                "error_type": "api_request_failed",
                "error_details": str(e),
                "total_results": 0,
                "articles": [],
                "searched_at": datetime.now().isoformat()
            }
    
    async def get_trend_analysis(self, feature_description: str, target_market: str = "", days_back: int = 30) -> Dict[str, Any]:
        """
        Analyze trends for a specific feature using enhanced NLP
        
        Args:
            feature_description: Description of the feature
            target_market: Target market description
            days_back: Number of days to look back
            
        Returns:
            Trend analysis
        """
        try:
            # Use NLP-enhanced query generation
            enhanced_data = await self.nlp_enhancer.enhance_query(feature_description, target_market)
            optimized_queries = self.nlp_enhancer.get_optimized_queries_for_platform(enhanced_data, 'news')
            
            # If no optimized queries found, fall back to simpler approach
            if not optimized_queries:
                # Extract key words from the description
                words = feature_description.lower().split()
                key_words = [word for word in words if len(word) > 3 and word not in ['and', 'the', 'for', 'with', 'that', 'this']]
                
                # Create basic queries from key words
                if len(key_words) >= 2:
                    optimized_queries = [
                        ' '.join(key_words[:2]),
                        ' '.join(key_words[:3]) if len(key_words) >= 3 else ' '.join(key_words),
                        f"{key_words[0]} market" if key_words else "technology market",
                        f"{key_words[0]} industry" if key_words else "technology industry"
                    ]
                else:
                    optimized_queries = [feature_description]
            
            logger.info(f"Using optimized search queries: {optimized_queries}")
            search_queries = optimized_queries
            
            # Collect news for each query
            query_results = {}
            successful_queries = []
            
            for query in search_queries:
                result = await self.search_news(query, days_back)
                query_results[query] = result
                
                # Track queries that returned results
                if result.get("total_results", 0) > 0:
                    successful_queries.append(query)
            
            # If no results from optimized queries, try broader fallback searches
            if not successful_queries:
                logger.info("No results from optimized queries, trying broader fallback searches")
                fallback_queries = [
                    "fitness app",
                    "health technology", 
                    "mobile app market",
                    "wellness industry",
                    "AI technology",
                    "personal training",
                    "nutrition app"
                ]
                
                for query in fallback_queries:
                    if query not in query_results:  # Avoid duplicates
                        result = await self.search_news(query, days_back)
                        query_results[query] = result
                        
                        if result.get("total_results", 0) > 0:
                            successful_queries.append(query)
                            # Try a few fallback queries, not just one
                            if len(successful_queries) >= 3:
                                break
            
            # Aggregate results
            trend_analysis = {
                "feature_description": feature_description,
                "original_queries": search_queries,
                "successful_queries": successful_queries,
                "total_queries_tried": len(query_results),
                "aggregate_stats": self._aggregate_news_results(query_results),
                "query_results": query_results,
                "analyzed_at": datetime.now().isoformat()
            }
            
            # Identify trends
            trend_analysis["trends"] = self._identify_trends(trend_analysis)
            
            return trend_analysis
            
        except Exception as e:
            logger.error(f"Error in trend analysis: {str(e)}")
            return {"error": str(e)}
    
    async def get_industry_trends(self, industry: str, timeframe: str = "30d") -> Dict[str, Any]:
        """Get general industry trends"""
        try:
            # Convert timeframe to days
            days_mapping = {"7d": 7, "30d": 30, "90d": 90}
            days_back = days_mapping.get(timeframe, 30)
            
            # Search for industry news
            result = await self.search_news(f"{industry} industry trends", days_back)
            
            # Augment with industry-specific analysis
            result["industry"] = industry
            result["timeframe"] = timeframe
            result["trend_score"] = self._calculate_trend_score(result)
            
            # Extract top trends
            result["top_trends"] = self._extract_top_trends(result, 5)
            
            return result
            
        except Exception as e:
            logger.error(f"Error getting industry trends: {str(e)}")
            return {"error": str(e)}
    
    def _aggregate_news_results(self, query_results: Dict[str, Any]) -> Dict[str, Any]:
        """Aggregate results from multiple queries"""
        aggregate = {
            "total_articles": 0,
            "total_sources": 0,
            "sources": {},
            "keywords": {},
            "sentiment": {
                "positive": 0,
                "negative": 0,
                "neutral": 0,
                "average_polarity": 0
            }
        }
        
        # Combine results from all queries
        articles_processed = set()  # Track unique articles by URL
        total_polarity = 0
        articles_with_sentiment = 0
        
        for query, result in query_results.items():
            if "articles" not in result:
                continue
                
            for article in result["articles"]:
                # Skip duplicates
                url = article.get("url", "")
                if url in articles_processed:
                    continue
                    
                articles_processed.add(url)
                aggregate["total_articles"] += 1
                
                # Update source statistics
                source = article.get("source", "Unknown")
                if source not in aggregate["sources"]:
                    aggregate["sources"][source] = 0
                    aggregate["total_sources"] += 1
                aggregate["sources"][source] += 1
                
                # Update keyword statistics
                for keyword in article.get("keywords", []):
                    if keyword not in aggregate["keywords"]:
                        aggregate["keywords"][keyword] = 0
                    aggregate["keywords"][keyword] += 1
                
                # Update sentiment statistics using new classification
                sentiment = article.get("sentiment", {})
                classification = sentiment.get("classification", "neutral")
                polarity = sentiment.get("polarity", 0)
                
                # Use the enhanced classification
                if classification == "positive":
                    aggregate["sentiment"]["positive"] += 1
                elif classification == "negative":
                    aggregate["sentiment"]["negative"] += 1
                else:
                    aggregate["sentiment"]["neutral"] += 1
                
                total_polarity += polarity
                articles_with_sentiment += 1
        
        # Calculate average sentiment
        if articles_with_sentiment > 0:
            aggregate["sentiment"]["average_polarity"] = total_polarity / articles_with_sentiment
        
        # Get top keywords and sources
        aggregate["top_keywords"] = sorted(
            aggregate["keywords"].items(), 
            key=lambda x: x[1], 
            reverse=True
        )[:10]
        
        aggregate["top_sources"] = sorted(
            aggregate["sources"].items(), 
            key=lambda x: x[1], 
            reverse=True
        )[:5]
        
        return aggregate
    
    def _identify_trends(self, analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Identify trends from news analysis"""
        trends = []
        
        # Extract top keywords as trends
        keywords = analysis.get("aggregate_stats", {}).get("keywords", {})
        for keyword, count in sorted(keywords.items(), key=lambda x: x[1], reverse=True)[:10]:
            # Only include significant keywords
            if count >= 2:
                trends.append({
                    "keyword": keyword,
                    "count": count,
                    "type": "topic"
                })
        
        # Look for growth/emerging trends
        # In a real implementation, this would compare against historical data
        
        return trends
    
    def _calculate_trend_score(self, result: Dict[str, Any]) -> float:
        """Calculate trend score based on article volume and recency"""
        if "articles" not in result or not result["articles"]:
            return 0.0
        
        # Count recent articles (last 7 days)
        recent_count = 0
        total_count = len(result["articles"])
        
        one_week_ago = datetime.now() - timedelta(days=7)
        
        for article in result["articles"]:
            try:
                published_date = datetime.fromisoformat(article.get("published_at", "").replace("Z", "+00:00"))
                if published_date >= one_week_ago:
                    recent_count += 1
            except ValueError:
                pass
        
        # Calculate trend score (0-10 scale)
        recency_ratio = recent_count / total_count if total_count > 0 else 0
        trend_score = min((total_count / 10) * (1 + recency_ratio), 10.0)
        
        return round(trend_score, 2)
    
    def _extract_top_trends(self, result: Dict[str, Any], limit: int = 5) -> List[Dict[str, Any]]:
        """Extract top trends from news result"""
        trends = []
        
        # Extract keyword trends
        keywords = {}
        for article in result.get("articles", []):
            for keyword in article.get("keywords", []):
                if keyword not in keywords:
                    keywords[keyword] = {
                        "keyword": keyword,
                        "count": 0,
                        "sentiment": 0,
                        "articles": []
                    }
                
                keywords[keyword]["count"] += 1
                keywords[keyword]["sentiment"] += article.get("sentiment", {}).get("polarity", 0)
                keywords[keyword]["articles"].append(article.get("url", ""))
        
        # Calculate average sentiment and sort by count
        for keyword, data in keywords.items():
            if data["count"] > 0:
                data["sentiment"] = data["sentiment"] / data["count"]
                data["articles"] = data["articles"][:3]  # Limit to top 3 articles
                trends.append(data)
        
        # Sort by count and return top trends
        return sorted(trends, key=lambda x: x["count"], reverse=True)[:limit]
    

# Example usage and testing
async def test_news_collector():
    """Test function for NewsCollector"""
    collector = NewsCollector()
    
    # Test trend analysis
    print("Testing trend analysis...")
    result = await collector.get_trend_analysis(
        "AI voice ordering", 
        days_back=15
    )
    print(f"Trend analysis: {result.get('aggregate_stats', {}).get('total_articles', 0)} articles analyzed")
    
    # Test industry trends
    print("Testing industry trends...")
    industry_result = await collector.get_industry_trends("artificial intelligence", "30d")
    print(f"Industry trends: {industry_result.get('trend_score', 0)} trend score")
    
    return result, industry_result

if __name__ == "__main__":
    # Run test
    asyncio.run(test_news_collector())