Spaces:

SoulofSukuna
/

Tor-Search-Api

Sleeping

Tor-Search-Api / helper /html_scraper.py

Update helper/html_scraper.py

46ed472 verified 7 months ago

1.31 kB

	import cloudscraper
	from concurrent.futures import ThreadPoolExecutor
	import os
	import asyncio
	from .asyncioPoliciesFix import decorator_asyncio_fix
	from constants.headers import HEADER_AIO

	HTTP_PROXY = os.environ.get("HTTP_PROXY", None)

	class Scraper:
	@decorator_asyncio_fix
	async def _get_html(self, session, url):
	try:
	async with session.get(url, headers=HEADER_AIO, proxy=HTTP_PROXY) as r:
	return await r.text()
	except:
	return None

	async def get_all_results(self, session, url):
	return await asyncio.gather(asyncio.create_task(self._get_html(session, url)))


	class CloudScraper:
	def __init__(self):
	self.scraper = cloudscraper.create_scraper()

	def _get_html(self, url):
	try:
	response = self.scraper.get(url, headers=HEADER_AIO, proxies={'http': HTTP_PROXY, 'https': HTTP_PROXY})
	response.raise_for_status() # Raise an error for bad responses
	return response.text
	except Exception as e:
	print(f"Error fetching {url}: {e}")
	return None

	async def get_all_results(self, url):
	loop = asyncio.get_event_loop()
	with ThreadPoolExecutor() as pool:
	return await loop.run_in_executor(pool, self._get_html, url)