randydev commited on
Commit
23779ec
·
verified ·
1 Parent(s): b51f32d

Create pirate_bay.py

Browse files
Files changed (1) hide show
  1. torrents/pirate_bay.py +98 -0
torrents/pirate_bay.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import time
3
+ import aiohttp
4
+ from bs4 import BeautifulSoup
5
+ from helper.html_scraper import Scraper
6
+ from constants.base_url import PIRATEBAY
7
+
8
+
9
+ class PirateBay:
10
+ def __init__(self):
11
+ self.BASE_URL = PIRATEBAY
12
+ self.LIMIT = None
13
+
14
+ def _parser(self, htmls):
15
+ try:
16
+ for html in htmls:
17
+ soup = BeautifulSoup(html, "html.parser")
18
+
19
+ my_dict = {"data": []}
20
+ for tr in soup.find_all("tr")[1:]:
21
+ td = tr.find_all("td")
22
+ try:
23
+ name = td[1].find("a").text
24
+ except:
25
+ name = None
26
+ if name:
27
+ url = td[1].find("a")["href"]
28
+ magnet = td[3].find_all("a")[0]["href"]
29
+ size = td[4].text.strip()
30
+ seeders = td[5].text
31
+ leechers = td[6].text
32
+ category = td[0].find_all("a")[0].text
33
+ uploader = td[7].text
34
+ dateUploaded = td[2].text
35
+
36
+ my_dict["data"].append(
37
+ {
38
+ "name": name,
39
+ "size": size,
40
+ "seeders": seeders,
41
+ "leechers": leechers,
42
+ "category": category,
43
+ "uploader": uploader,
44
+ "url": url,
45
+ "date": dateUploaded,
46
+ "hash": re.search(
47
+ r"([{a-f\d,A-F\d}]{32,40})\b", magnet
48
+ ).group(0),
49
+ "magnet": magnet,
50
+ }
51
+ )
52
+ if len(my_dict["data"]) == self.LIMIT:
53
+ break
54
+ last_tr = soup.find_all("tr")[-1]
55
+ potential_page_link = last_tr.find("td").find("a").href
56
+ check_if_pagination_available = potential_page_link is not None and potential_page_link[:len("/search/")] == "/search/"
57
+ if check_if_pagination_available:
58
+ current_page = last_tr.find("td").find("b").text
59
+ my_dict["current_page"] = int(current_page)
60
+ my_dict["total_pages"] = int(
61
+ last_tr.find("td").find_all("a")[-2].text
62
+ )
63
+ return my_dict
64
+ except:
65
+ return None
66
+
67
+ async def search(self, query, page, limit):
68
+ async with aiohttp.ClientSession() as session:
69
+ start_time = time.time()
70
+ self.LIMIT = limit
71
+ url = self.BASE_URL + "/search/{}/{}/99/0".format(query, page)
72
+ return await self.parser_result(start_time, url, session)
73
+
74
+ async def parser_result(self, start_time, url, session):
75
+ html = await Scraper().get_all_results(session, url)
76
+ results = self._parser(html)
77
+ if results is not None:
78
+ results["time"] = time.time() - start_time
79
+ results["total"] = len(results["data"])
80
+ return results
81
+ return results
82
+
83
+ async def trending(self, category, page, limit):
84
+ async with aiohttp.ClientSession() as session:
85
+ start_time = time.time()
86
+ self.LIMIT = limit
87
+ url = self.BASE_URL + "/top/all"
88
+ return await self.parser_result(start_time, url, session)
89
+
90
+ async def recent(self, category, page, limit):
91
+ async with aiohttp.ClientSession() as session:
92
+ start_time = time.time()
93
+ self.LIMIT = limit
94
+ if not category:
95
+ url = self.BASE_URL + "/recent"
96
+ else:
97
+ url = self.BASE_URL + "/{}/latest/".format(category)
98
+ return await self.parser_result(start_time, url, session)