randydev commited on
Commit
fe38f2a
·
verified ·
1 Parent(s): 4d37bef

Create limetorrents.py

Browse files
Files changed (1) hide show
  1. torrents/limetorrents.py +133 -0
torrents/limetorrents.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import re
3
+ import time
4
+ import aiohttp
5
+ from bs4 import BeautifulSoup
6
+ from helper.asyncioPoliciesFix import decorator_asyncio_fix
7
+ from helper.html_scraper import Scraper
8
+ from constants.base_url import LIMETORRENT
9
+ from constants.headers import HEADER_AIO
10
+
11
+
12
+ class Limetorrent:
13
+ def __init__(self):
14
+ self.BASE_URL = LIMETORRENT
15
+ self.LIMIT = None
16
+
17
+ @decorator_asyncio_fix
18
+ async def _individual_scrap(self, session, url, obj):
19
+ try:
20
+ async with session.get(url, headers=HEADER_AIO) as res:
21
+ html = await res.text(encoding="ISO-8859-1")
22
+ soup = BeautifulSoup(html, "html.parser")
23
+ try:
24
+ a_tag = soup.find_all("a", class_="csprite_dltorrent")
25
+ obj["torrent"] = a_tag[0]["href"]
26
+ obj["magnet"] = a_tag[-1]["href"]
27
+ obj["hash"] = re.search(
28
+ r"([{a-f\d,A-F\d}]{32,40})\b", obj["magnet"]
29
+ ).group(0)
30
+ except:
31
+ ...
32
+ except:
33
+ return None
34
+
35
+ async def _get_torrent(self, result, session, urls):
36
+ tasks = []
37
+ for idx, url in enumerate(urls):
38
+ for obj in result["data"]:
39
+ if obj["url"] == url:
40
+ task = asyncio.create_task(
41
+ self._individual_scrap(session, url, result["data"][idx])
42
+ )
43
+ tasks.append(task)
44
+ await asyncio.gather(*tasks)
45
+ return result
46
+
47
+ def _parser(self, htmls, idx=0):
48
+ try:
49
+ for html in htmls:
50
+ soup = BeautifulSoup(html, "html.parser")
51
+ list_of_urls = []
52
+ my_dict = {"data": []}
53
+
54
+ for tr in soup.find_all("tr")[idx:]:
55
+ td = tr.find_all("td")
56
+ if len(td) == 0:
57
+ continue
58
+ name = td[0].get_text(strip=True)
59
+ url = self.BASE_URL + td[0].find_all("a")[-1]["href"]
60
+ list_of_urls.append(url)
61
+ added_on_and_category = td[1].get_text(strip=True)
62
+ date = (added_on_and_category.split("-")[0]).strip()
63
+ category = (added_on_and_category.split("in")[-1]).strip()
64
+ size = td[2].text
65
+ seeders = td[3].text
66
+ leechers = td[4].text
67
+ my_dict["data"].append(
68
+ {
69
+ "name": name,
70
+ "size": size,
71
+ "date": date,
72
+ "category": category if category != date else None,
73
+ "seeders": seeders,
74
+ "leechers": leechers,
75
+ "url": url,
76
+ }
77
+ )
78
+ if len(my_dict["data"]) == self.LIMIT:
79
+ break
80
+ try:
81
+ div = soup.find("div", class_="search_stat")
82
+ current_page = int(div.find("span", class_="active").text)
83
+ total_page = int((div.find_all("a"))[-2].text)
84
+ if current_page > total_page:
85
+ total_page = current_page
86
+ my_dict["current_page"] = current_page
87
+ my_dict["total_pages"] = total_page
88
+ except:
89
+ ...
90
+ return my_dict, list_of_urls
91
+ except:
92
+ return None, None
93
+
94
+ async def search(self, query, page, limit):
95
+ async with aiohttp.ClientSession() as session:
96
+ start_time = time.time()
97
+ self.LIMIT = limit
98
+ url = self.BASE_URL + "/search/all/{}//{}".format(query, page)
99
+ return await self.parser_result(start_time, url, session, idx=5)
100
+
101
+ async def parser_result(self, start_time, url, session, idx=0):
102
+ htmls = await Scraper().get_all_results(session, url)
103
+ result, urls = self._parser(htmls, idx)
104
+ if result is not None:
105
+ results = await self._get_torrent(result, session, urls)
106
+ results["time"] = time.time() - start_time
107
+ results["total"] = len(results["data"])
108
+ return results
109
+ return result
110
+
111
+ async def trending(self, category, page, limit):
112
+ async with aiohttp.ClientSession() as session:
113
+ start_time = time.time()
114
+ self.LIMIT = limit
115
+ url = self.BASE_URL + "/top100"
116
+ return await self.parser_result(start_time, url, session)
117
+
118
+ async def recent(self, category, page, limit):
119
+ async with aiohttp.ClientSession() as session:
120
+ start_time = time.time()
121
+ self.LIMIT = limit
122
+ if not category:
123
+ url = self.BASE_URL + "/latest100"
124
+ else:
125
+ category = (category).capitalize()
126
+ if category == "Apps":
127
+ category = "Applications"
128
+ elif category == "Tv":
129
+ category = "TV-shows"
130
+ url = self.BASE_URL + "/browse-torrents/{}/date/{}/".format(
131
+ category, page
132
+ )
133
+ return await self.parser_result(start_time, url, session)