c4ai

Running

File size: 2,671 Bytes

03c0888

import os
import sys
import pytest
import asyncio

# Add the parent directory to the Python path
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)

from crawl4ai.async_webcrawler import AsyncWebCrawler

@pytest.mark.asyncio
async def test_caching():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        
        # First crawl (should not use cache)
        start_time = asyncio.get_event_loop().time()
        result1 = await crawler.arun(url=url, bypass_cache=True)
        end_time = asyncio.get_event_loop().time()
        time_taken1 = end_time - start_time
        
        assert result1.success
        
        # Second crawl (should use cache)
        start_time = asyncio.get_event_loop().time()
        result2 = await crawler.arun(url=url, bypass_cache=False)
        end_time = asyncio.get_event_loop().time()
        time_taken2 = end_time - start_time
        
        assert result2.success
        assert time_taken2 < time_taken1  # Cached result should be faster

@pytest.mark.asyncio
async def test_bypass_cache():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        
        # First crawl
        result1 = await crawler.arun(url=url, bypass_cache=False)
        assert result1.success
        
        # Second crawl with bypass_cache=True
        result2 = await crawler.arun(url=url, bypass_cache=True)
        assert result2.success
        
        # Content should be different (or at least, not guaranteed to be the same)
        assert result1.html != result2.html or result1.markdown != result2.markdown

@pytest.mark.asyncio
async def test_clear_cache():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        
        # Crawl and cache
        await crawler.arun(url=url, bypass_cache=False)
        
        # Clear cache
        await crawler.aclear_cache()
        
        # Check cache size
        cache_size = await crawler.aget_cache_size()
        assert cache_size == 0

@pytest.mark.asyncio
async def test_flush_cache():
    async with AsyncWebCrawler(verbose=True) as crawler:
        url = "https://www.nbcnews.com/business"
        
        # Crawl and cache
        await crawler.arun(url=url, bypass_cache=False)
        
        # Flush cache
        await crawler.aflush_cache()
        
        # Check cache size
        cache_size = await crawler.aget_cache_size()
        assert cache_size == 0

# Entry point for debugging
if __name__ == "__main__":
    pytest.main([__file__, "-v"])