c4ai

Running

c4ai / tests /async /test_error_handling.py

amaye15

test

03c0888 2 months ago

3.17 kB

	# import os
	# import sys
	# import pytest
	# import asyncio

	# # Add the parent directory to the Python path
	# parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	# sys.path.append(parent_dir)

	# from crawl4ai.async_webcrawler import AsyncWebCrawler
	# from crawl4ai.utils import InvalidCSSSelectorError

	# class AsyncCrawlerWrapper:
	# def __init__(self):
	# self.crawler = None

	# async def setup(self):
	# self.crawler = AsyncWebCrawler(verbose=True)
	# await self.crawler.awarmup()

	# async def cleanup(self):
	# if self.crawler:
	# await self.crawler.aclear_cache()

	# @pytest.fixture(scope="module")
	# def crawler_wrapper():
	# wrapper = AsyncCrawlerWrapper()
	# asyncio.get_event_loop().run_until_complete(wrapper.setup())
	# yield wrapper
	# asyncio.get_event_loop().run_until_complete(wrapper.cleanup())

	# @pytest.mark.asyncio
	# async def test_network_error(crawler_wrapper):
	# url = "https://www.nonexistentwebsite123456789.com"
	# result = await crawler_wrapper.crawler.arun(url=url, bypass_cache=True)
	# assert not result.success
	# assert "Failed to crawl" in result.error_message

	# # @pytest.mark.asyncio
	# # async def test_timeout_error(crawler_wrapper):
	# # # Simulating a timeout by using a very short timeout value
	# # url = "https://www.nbcnews.com/business"
	# # result = await crawler_wrapper.crawler.arun(url=url, bypass_cache=True, timeout=0.001)
	# # assert not result.success
	# # assert "timeout" in result.error_message.lower()

	# # @pytest.mark.asyncio
	# # async def test_invalid_css_selector(crawler_wrapper):
	# # url = "https://www.nbcnews.com/business"
	# # with pytest.raises(InvalidCSSSelectorError):
	# # await crawler_wrapper.crawler.arun(url=url, bypass_cache=True, css_selector="invalid>>selector")

	# # @pytest.mark.asyncio
	# # async def test_js_execution_error(crawler_wrapper):
	# # url = "https://www.nbcnews.com/business"
	# # invalid_js = "This is not valid JavaScript code;"
	# # result = await crawler_wrapper.crawler.arun(url=url, bypass_cache=True, js=invalid_js)
	# # assert not result.success
	# # assert "JavaScript" in result.error_message

	# # @pytest.mark.asyncio
	# # async def test_empty_page(crawler_wrapper):
	# # # Use a URL that typically returns an empty page
	# # url = "http://example.com/empty"
	# # result = await crawler_wrapper.crawler.arun(url=url, bypass_cache=True)
	# # assert result.success # The crawl itself should succeed
	# # assert not result.markdown.strip() # The markdown content should be empty or just whitespace

	# # @pytest.mark.asyncio
	# # async def test_rate_limiting(crawler_wrapper):
	# # # Simulate rate limiting by making multiple rapid requests
	# # url = "https://www.nbcnews.com/business"
	# # results = await asyncio.gather(*[crawler_wrapper.crawler.arun(url=url, bypass_cache=True) for _ in range(10)])
	# # assert any(not result.success and "rate limit" in result.error_message.lower() for result in results)

	# # Entry point for debugging
	# if __name__ == "__main__":
	# pytest.main([__file__, "-v"])