from typing import List, Dict

from ..actions.utils import stream_output
from ..actions.web_scraping import scrape_urls
from ..scraper.utils import get_image_hash  # Add this import


class BrowserManager:
    """Manages context for the researcher agent."""

    def __init__(self, researcher):
        self.researcher = researcher

    async def browse_urls(self, urls: List[str]) -> List[Dict]:
        """
        Scrape content from a list of URLs.

        Args:
            urls (List[str]): List of URLs to scrape.

        Returns:
            List[Dict]: List of scraped content results.
        """
        if self.researcher.verbose:
            await stream_output(
                "logs",
                "scraping_urls",
                f"🌐 Scraping content from {len(urls)} URLs...",
                self.researcher.websocket,
            )

        scraped_content, images = scrape_urls(urls, self.researcher.cfg)
        self.researcher.add_research_sources(scraped_content)
        new_images = self.select_top_images(images, k=4)  # Select top 2 images
        self.researcher.add_research_images(new_images)

        if self.researcher.verbose:
            await stream_output(
                "logs",
                "scraping_content",
                f"📄 Scraped {len(scraped_content)} pages of content",
                self.researcher.websocket,
            )
            await stream_output(
                "logs",
                "scraping_images",
                f"🖼️ Selected {len(new_images)} new images from {len(images)} total images",
                self.researcher.websocket,
                True,
                new_images
            )
            await stream_output(
                "logs",
                "scraping_complete",
                f"🌐 Scraping complete",
                self.researcher.websocket,
            )

        return scraped_content

    def select_top_images(self, images: List[Dict], k: int = 2) -> List[str]:
        """
        Select most relevant images and remove duplicates based on image content.

        Args:
            images (List[Dict]): List of image dictionaries with 'url' and 'score' keys.
            k (int): Number of top images to select if no high-score images are found.

        Returns:
            List[str]: List of selected image URLs.
        """
        unique_images = []
        seen_hashes = set()
        current_research_images = self.researcher.get_research_images()

        # First, select all score 2 and 3 images
        high_score_images = [img for img in images if img['score'] >= 2]

        for img in high_score_images + images:  # Process high-score images first, then all images
            img_hash = get_image_hash(img['url'])
            if img_hash and img_hash not in seen_hashes and img['url'] not in current_research_images:
                seen_hashes.add(img_hash)
                unique_images.append(img['url'])

                if len(unique_images) == k:
                    break

        return unique_images