SoulofSukuna commited on
Commit
4f66818
·
verified ·
1 Parent(s): e371ee8

Update torrents/pirate_bay.py

Browse files
Files changed (1) hide show
  1. torrents/pirate_bay.py +98 -97
torrents/pirate_bay.py CHANGED
@@ -1,97 +1,98 @@
1
- import re
2
- import time
3
- import aiohttp
4
- from bs4 import BeautifulSoup
5
- from helper.html_scraper import Scraper
6
- from constants.base_url import PIRATEBAY
7
-
8
-
9
- class PirateBay:
10
- def __init__(self):
11
- self.BASE_URL = PIRATEBAY
12
- self.LIMIT = None
13
-
14
- def _parser(self, htmls):
15
- try:
16
- for html in htmls:
17
- soup = BeautifulSoup(html, "html.parser")
18
-
19
- my_dict = {"data": []}
20
- for tr in soup.find_all("tr")[1:]:
21
- td = tr.find_all("td")
22
- try:
23
- name = td[1].find("a").text
24
- except:
25
- name = None
26
- if name:
27
- url = td[1].find("a")["href"]
28
- magnet = td[3].find_all("a")[0]["href"]
29
- size = td[4].text.strip()
30
- seeders = td[5].text
31
- leechers = td[6].text
32
- category = td[0].find_all("a")[0].text
33
- uploader = td[7].text
34
- dateUploaded = td[2].text
35
-
36
- my_dict["data"].append(
37
- {
38
- "name": name,
39
- "size": size,
40
- "seeders": seeders,
41
- "leechers": leechers,
42
- "category": category,
43
- "uploader": uploader,
44
- "url": url,
45
- "date": dateUploaded,
46
- "hash": re.search(
47
- r"([{a-f\d,A-F\d}]{32,40})\b", magnet
48
- ).group(0),
49
- "magnet": magnet,
50
- }
51
- )
52
- if len(my_dict["data"]) == self.LIMIT:
53
- break
54
- last_tr = soup.find_all("tr")[-1]
55
- check_if_pagination_available = last_tr.find("td").find("center")
56
- if not check_if_pagination_available:
57
- current_page = last_tr.find("td").find("b").text
58
- my_dict["current_page"] = int(current_page)
59
- my_dict["total_pages"] = int(
60
- last_tr.find("td").find_all("a")[-2].text
61
- )
62
- return my_dict
63
- except:
64
- return None
65
-
66
- async def search(self, query, page, limit):
67
- async with aiohttp.ClientSession() as session:
68
- start_time = time.time()
69
- self.LIMIT = limit
70
- url = self.BASE_URL + "/search/{}/{}/99/0".format(query, page)
71
- return await self.parser_result(start_time, url, session)
72
-
73
- async def parser_result(self, start_time, url, session):
74
- html = await Scraper().get_all_results(session, url)
75
- results = self._parser(html)
76
- if results is not None:
77
- results["time"] = time.time() - start_time
78
- results["total"] = len(results["data"])
79
- return results
80
- return results
81
-
82
- async def trending(self, category, page, limit):
83
- async with aiohttp.ClientSession() as session:
84
- start_time = time.time()
85
- self.LIMIT = limit
86
- url = self.BASE_URL + "/top/all"
87
- return await self.parser_result(start_time, url, session)
88
-
89
- async def recent(self, category, page, limit):
90
- async with aiohttp.ClientSession() as session:
91
- start_time = time.time()
92
- self.LIMIT = limit
93
- if not category:
94
- url = self.BASE_URL + "/recent"
95
- else:
96
- url = self.BASE_URL + "/{}/latest/".format(category)
97
- return await self.parser_result(start_time, url, session)
 
 
1
+ import re
2
+ import time
3
+ import aiohttp
4
+ from bs4 import BeautifulSoup
5
+ from helper.html_scraper import Scraper
6
+ from constants.base_url import PIRATEBAY
7
+
8
+
9
+ class PirateBay:
10
+ def __init__(self):
11
+ self.BASE_URL = PIRATEBAY
12
+ self.LIMIT = None
13
+
14
+ def _parser(self, htmls):
15
+ try:
16
+ for html in htmls:
17
+ soup = BeautifulSoup(html, "html.parser")
18
+
19
+ my_dict = {"data": []}
20
+ for tr in soup.find_all("tr")[1:]:
21
+ td = tr.find_all("td")
22
+ try:
23
+ name = td[1].find("a").text
24
+ except:
25
+ name = None
26
+ if name:
27
+ url = td[1].find("a")["href"]
28
+ magnet = td[3].find_all("a")[0]["href"]
29
+ size = td[4].text.strip()
30
+ seeders = td[5].text
31
+ leechers = td[6].text
32
+ category = td[0].find_all("a")[0].text
33
+ uploader = td[7].text
34
+ dateUploaded = td[2].text
35
+
36
+ my_dict["data"].append(
37
+ {
38
+ "name": name,
39
+ "size": size,
40
+ "seeders": seeders,
41
+ "leechers": leechers,
42
+ "category": category,
43
+ "uploader": uploader,
44
+ "url": url,
45
+ "date": dateUploaded,
46
+ "hash": re.search(
47
+ r"([{a-f\d,A-F\d}]{32,40})\b", magnet
48
+ ).group(0),
49
+ "magnet": magnet,
50
+ }
51
+ )
52
+ if len(my_dict["data"]) == self.LIMIT:
53
+ break
54
+ last_tr = soup.find_all("tr")[-1]
55
+ potential_page_link = last_tr.find("td").find("a").href
56
+ check_if_pagination_available = potential_page_link is not None and potential_page_link[:len("/search/")] == "/search/"
57
+ if check_if_pagination_available:
58
+ current_page = last_tr.find("td").find("b").text
59
+ my_dict["current_page"] = int(current_page)
60
+ my_dict["total_pages"] = int(
61
+ last_tr.find("td").find_all("a")[-2].text
62
+ )
63
+ return my_dict
64
+ except:
65
+ return None
66
+
67
+ async def search(self, query, page, limit):
68
+ async with aiohttp.ClientSession() as session:
69
+ start_time = time.time()
70
+ self.LIMIT = limit
71
+ url = self.BASE_URL + "/search/{}/{}/99/0".format(query, page)
72
+ return await self.parser_result(start_time, url, session)
73
+
74
+ async def parser_result(self, start_time, url, session):
75
+ html = await Scraper().get_all_results(session, url)
76
+ results = self._parser(html)
77
+ if results is not None:
78
+ results["time"] = time.time() - start_time
79
+ results["total"] = len(results["data"])
80
+ return results
81
+ return results
82
+
83
+ async def trending(self, category, page, limit):
84
+ async with aiohttp.ClientSession() as session:
85
+ start_time = time.time()
86
+ self.LIMIT = limit
87
+ url = self.BASE_URL + "/top/all"
88
+ return await self.parser_result(start_time, url, session)
89
+
90
+ async def recent(self, category, page, limit):
91
+ async with aiohttp.ClientSession() as session:
92
+ start_time = time.time()
93
+ self.LIMIT = limit
94
+ if not category:
95
+ url = self.BASE_URL + "/recent"
96
+ else:
97
+ url = self.BASE_URL + "/{}/latest/".format(category)
98
+ return await self.parser_result(start_time, url, session)