Tor-Search-Api / torrents /pirate_bay.py
SoulofSukuna's picture
Update torrents/pirate_bay.py
4f66818 verified
import re
import time
import aiohttp
from bs4 import BeautifulSoup
from helper.html_scraper import Scraper
from constants.base_url import PIRATEBAY
class PirateBay:
def __init__(self):
self.BASE_URL = PIRATEBAY
self.LIMIT = None
def _parser(self, htmls):
try:
for html in htmls:
soup = BeautifulSoup(html, "html.parser")
my_dict = {"data": []}
for tr in soup.find_all("tr")[1:]:
td = tr.find_all("td")
try:
name = td[1].find("a").text
except:
name = None
if name:
url = td[1].find("a")["href"]
magnet = td[3].find_all("a")[0]["href"]
size = td[4].text.strip()
seeders = td[5].text
leechers = td[6].text
category = td[0].find_all("a")[0].text
uploader = td[7].text
dateUploaded = td[2].text
my_dict["data"].append(
{
"name": name,
"size": size,
"seeders": seeders,
"leechers": leechers,
"category": category,
"uploader": uploader,
"url": url,
"date": dateUploaded,
"hash": re.search(
r"([{a-f\d,A-F\d}]{32,40})\b", magnet
).group(0),
"magnet": magnet,
}
)
if len(my_dict["data"]) == self.LIMIT:
break
last_tr = soup.find_all("tr")[-1]
potential_page_link = last_tr.find("td").find("a").href
check_if_pagination_available = potential_page_link is not None and potential_page_link[:len("/search/")] == "/search/"
if check_if_pagination_available:
current_page = last_tr.find("td").find("b").text
my_dict["current_page"] = int(current_page)
my_dict["total_pages"] = int(
last_tr.find("td").find_all("a")[-2].text
)
return my_dict
except:
return None
async def search(self, query, page, limit):
async with aiohttp.ClientSession() as session:
start_time = time.time()
self.LIMIT = limit
url = self.BASE_URL + "/search/{}/{}/99/0".format(query, page)
return await self.parser_result(start_time, url, session)
async def parser_result(self, start_time, url, session):
html = await Scraper().get_all_results(session, url)
results = self._parser(html)
if results is not None:
results["time"] = time.time() - start_time
results["total"] = len(results["data"])
return results
return results
async def trending(self, category, page, limit):
async with aiohttp.ClientSession() as session:
start_time = time.time()
self.LIMIT = limit
url = self.BASE_URL + "/top/all"
return await self.parser_result(start_time, url, session)
async def recent(self, category, page, limit):
async with aiohttp.ClientSession() as session:
start_time = time.time()
self.LIMIT = limit
if not category:
url = self.BASE_URL + "/recent"
else:
url = self.BASE_URL + "/{}/latest/".format(category)
return await self.parser_result(start_time, url, session)