randydev commited on
Commit
1438064
·
verified ·
1 Parent(s): 23779ec

Create torlock.py

Browse files
Files changed (1) hide show
  1. torrents/torlock.py +156 -0
torrents/torlock.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import re
3
+ import time
4
+ import aiohttp
5
+ from bs4 import BeautifulSoup
6
+ from helper.asyncioPoliciesFix import decorator_asyncio_fix
7
+ from helper.html_scraper import Scraper
8
+ from constants.base_url import TORLOCK
9
+ from constants.headers import HEADER_AIO
10
+
11
+
12
+ class Torlock:
13
+ def __init__(self):
14
+ self.BASE_URL = TORLOCK
15
+ self.LIMIT = None
16
+
17
+ @decorator_asyncio_fix
18
+ async def _individual_scrap(self, session, url, obj):
19
+ try:
20
+ async with session.get(url, headers=HEADER_AIO) as res:
21
+ html = await res.text(encoding="ISO-8859-1")
22
+ soup = BeautifulSoup(html, "html.parser")
23
+ try:
24
+ tm = soup.find_all("a")
25
+ magnet = tm[20]["href"]
26
+ torrent = tm[23]["href"]
27
+ try:
28
+ obj["poster"] = soup.find_all("img", class_="img-responsive")[
29
+ 0
30
+ ]["src"]
31
+ except:
32
+ ...
33
+ if str(magnet).startswith("magnet") and str(torrent).endswith(
34
+ "torrent"
35
+ ):
36
+ obj["torrent"] = torrent
37
+ obj["magnet"] = magnet
38
+ obj["hash"] = re.search(
39
+ r"([{a-f\d,A-F\d}]{32,40})\b", magnet
40
+ ).group(0)
41
+ obj["category"] = tm[25].text
42
+ imgs = soup.select(".tab-content img.img-fluid")
43
+ if imgs and len(imgs) > 0:
44
+ obj["screenshot"] = [img["src"] for img in imgs]
45
+ else:
46
+ del obj
47
+ except IndexError:
48
+ ...
49
+ except:
50
+ return None
51
+
52
+ async def _get_torrent(self, result, session, urls):
53
+ tasks = []
54
+ for idx, url in enumerate(urls):
55
+ for obj in result["data"]:
56
+ if obj["url"] == url:
57
+ task = asyncio.create_task(
58
+ self._individual_scrap(session, url, result["data"][idx])
59
+ )
60
+ tasks.append(task)
61
+ await asyncio.gather(*tasks)
62
+ return result
63
+
64
+ def _parser(self, htmls, idx=0):
65
+ try:
66
+ for html in htmls:
67
+ soup = BeautifulSoup(html, "html.parser")
68
+ list_of_urls = []
69
+ my_dict = {"data": []}
70
+
71
+ for tr in soup.find_all("tr")[idx:]:
72
+ td = tr.find_all("td")
73
+ if len(td) == 0:
74
+ continue
75
+ name = td[0].get_text(strip=True)
76
+ if name != "":
77
+ url = td[0].find("a")["href"]
78
+ if url == "":
79
+ break
80
+ url = self.BASE_URL + url
81
+ list_of_urls.append(url)
82
+ size = td[2].get_text(strip=True)
83
+ date = td[1].get_text(strip=True)
84
+ seeders = td[3].get_text(strip=True)
85
+ leechers = td[4].get_text(strip=True)
86
+ my_dict["data"].append(
87
+ {
88
+ "name": name,
89
+ "size": size,
90
+ "date": date,
91
+ "seeders": seeders,
92
+ "leechers": leechers,
93
+ "url": url,
94
+ }
95
+ )
96
+ if len(my_dict["data"]) == self.LIMIT:
97
+ break
98
+ try:
99
+ ul = soup.find("ul", class_="pagination")
100
+ tpages = ul.find_all("a")[-2].text
101
+ current_page = (
102
+ (ul.find("li", class_="active")).find("span").text.split(" ")[0]
103
+ )
104
+ my_dict["current_page"] = int(current_page)
105
+ my_dict["total_pages"] = int(tpages)
106
+ except:
107
+ my_dict["current_page"] = None
108
+ my_dict["total_pages"] = None
109
+ return my_dict, list_of_urls
110
+ except:
111
+ return None, None
112
+
113
+ async def search(self, query, page, limit):
114
+ async with aiohttp.ClientSession() as session:
115
+ start_time = time.time()
116
+ self.LIMIT = limit
117
+ url = self.BASE_URL + "/all/torrents/{}.html?sort=seeds&page={}".format(
118
+ query, page
119
+ )
120
+ return await self.parser_result(start_time, url, session, idx=5)
121
+
122
+ async def parser_result(self, start_time, url, session, idx=0):
123
+ htmls = await Scraper().get_all_results(session, url)
124
+ result, urls = self._parser(htmls, idx)
125
+ if result is not None:
126
+ results = await self._get_torrent(result, session, urls)
127
+ results["time"] = time.time() - start_time
128
+ results["total"] = len(results["data"])
129
+ return results
130
+ return result
131
+
132
+ async def trending(self, category, page, limit):
133
+ async with aiohttp.ClientSession() as session:
134
+ start_time = time.time()
135
+ self.LIMIT = limit
136
+ if not category:
137
+ url = self.BASE_URL
138
+ else:
139
+ if category == "books":
140
+ category = "ebooks"
141
+ url = self.BASE_URL + "/{}.html".format(category)
142
+ return await self.parser_result(start_time, url, session)
143
+
144
+ async def recent(self, category, page, limit):
145
+ async with aiohttp.ClientSession() as session:
146
+ start_time = time.time()
147
+ self.LIMIT = limit
148
+ if not category:
149
+ url = self.BASE_URL + "/fresh.html"
150
+ else:
151
+ if category == "books":
152
+ category = "ebooks"
153
+ url = self.BASE_URL + "/{}/{}/added/desc.html".format(category, page)
154
+ return await self.parser_result(start_time, url, session)
155
+
156
+ #! Maybe impelment Search By Category in Future