randydev commited on
Commit
c6f93b8
·
verified ·
1 Parent(s): 1438064

Create torrentProject.py

Browse files
Files changed (1) hide show
  1. torrents/torrentProject.py +99 -0
torrents/torrentProject.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+ import aiohttp
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from helper.asyncioPoliciesFix import decorator_asyncio_fix
7
+ from helper.html_scraper import Scraper
8
+ from constants.base_url import TORRENTPROJECT
9
+ from constants.headers import HEADER_AIO
10
+
11
+
12
+ class TorrentProject:
13
+ def __init__(self):
14
+ self.BASE_URL = TORRENTPROJECT
15
+ self.LIMIT = None
16
+
17
+ @decorator_asyncio_fix
18
+ async def _individual_scrap(self, session, url, obj, sem):
19
+ async with sem:
20
+ try:
21
+ async with session.get(
22
+ url,
23
+ headers=HEADER_AIO,
24
+ ) as res:
25
+ html = await res.text(encoding="ISO-8859-1")
26
+ soup = BeautifulSoup(html, "html.parser")
27
+ try:
28
+ magnet = soup.select_one(
29
+ "#download > div:nth-child(2) > div > a"
30
+ )["href"]
31
+ index_of_magnet = magnet.index("magnet")
32
+ magnet = requests.utils.unquote(magnet[index_of_magnet:])
33
+ obj["magnet"] = magnet
34
+ except:
35
+ ...
36
+ except:
37
+ return None
38
+
39
+ async def _get_torrent(self, result, session, urls):
40
+ tasks = []
41
+ sem = asyncio.Semaphore(3)
42
+ for idx, url in enumerate(urls):
43
+ for obj in result["data"]:
44
+ if obj["url"] == url:
45
+ task = asyncio.create_task(
46
+ self._individual_scrap(session, url, result["data"][idx], sem)
47
+ )
48
+ tasks.append(task)
49
+ await asyncio.gather(*tasks)
50
+ return result
51
+
52
+ def _parser(self, htmls):
53
+ try:
54
+ for html in htmls:
55
+ soup = BeautifulSoup(html, "html.parser")
56
+ list_of_urls = []
57
+ my_dict = {"data": []}
58
+ for div in soup.select("div#similarfiles div")[2:]:
59
+ span = div.find_all("span")
60
+ name = span[0].find("a").text
61
+ url = self.BASE_URL + span[0].find("a")["href"]
62
+ list_of_urls.append(url)
63
+ seeders = span[2].text
64
+ leechers = span[3].text
65
+ date = span[4].text
66
+ size = span[5].text
67
+
68
+ my_dict["data"].append(
69
+ {
70
+ "name": name,
71
+ "size": size,
72
+ "date": date,
73
+ "seeders": seeders,
74
+ "leechers": leechers,
75
+ "url": url,
76
+ }
77
+ )
78
+ if len(my_dict["data"]) == self.LIMIT:
79
+ break
80
+ return my_dict, list_of_urls
81
+ except:
82
+ return None, None
83
+
84
+ async def search(self, query, page, limit):
85
+ async with aiohttp.ClientSession() as session:
86
+ start_time = time.time()
87
+ self.LIMIT = limit
88
+ url = self.BASE_URL + "/?t={}&p={}".format(query, page - 1)
89
+ return await self.parser_result(start_time, url, session)
90
+
91
+ async def parser_result(self, start_time, url, session):
92
+ htmls = await Scraper().get_all_results(session, url)
93
+ result, urls = self._parser(htmls)
94
+ if result is not None:
95
+ results = await self._get_torrent(result, session, urls)
96
+ results["time"] = time.time() - start_time
97
+ results["total"] = len(results["data"])
98
+ return results
99
+ return result