Spaces:
Sleeping
Sleeping
File size: 1,311 Bytes
77c14b6 46ed472 77c14b6 46ed472 77c14b6 46ed472 77c14b6 46ed472 77c14b6 46ed472 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import cloudscraper
from concurrent.futures import ThreadPoolExecutor
import os
import asyncio
from .asyncioPoliciesFix import decorator_asyncio_fix
from constants.headers import HEADER_AIO
HTTP_PROXY = os.environ.get("HTTP_PROXY", None)
class Scraper:
@decorator_asyncio_fix
async def _get_html(self, session, url):
try:
async with session.get(url, headers=HEADER_AIO, proxy=HTTP_PROXY) as r:
return await r.text()
except:
return None
async def get_all_results(self, session, url):
return await asyncio.gather(asyncio.create_task(self._get_html(session, url)))
class CloudScraper:
def __init__(self):
self.scraper = cloudscraper.create_scraper()
def _get_html(self, url):
try:
response = self.scraper.get(url, headers=HEADER_AIO, proxies={'http': HTTP_PROXY, 'https': HTTP_PROXY})
response.raise_for_status() # Raise an error for bad responses
return response.text
except Exception as e:
print(f"Error fetching {url}: {e}")
return None
async def get_all_results(self, url):
loop = asyncio.get_event_loop()
with ThreadPoolExecutor() as pool:
return await loop.run_in_executor(pool, self._get_html, url)
|