randydev commited on
Commit
b2166c3
·
verified ·
1 Parent(s): 46ff3d8

Create your_bittorrent.py

Browse files
Files changed (1) hide show
  1. torrents/your_bittorrent.py +128 -0
torrents/your_bittorrent.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+ import aiohttp
4
+ from bs4 import BeautifulSoup
5
+ from helper.asyncioPoliciesFix import decorator_asyncio_fix
6
+ from helper.html_scraper import Scraper
7
+ from constants.base_url import YOURBITTORRENT
8
+ from constants.headers import HEADER_AIO
9
+
10
+
11
+ class YourBittorrent:
12
+ def __init__(self):
13
+ self.BASE_URL = YOURBITTORRENT
14
+ self.LIMIT = None
15
+
16
+ @decorator_asyncio_fix
17
+ async def _individual_scrap(self, session, url, obj):
18
+ try:
19
+ async with session.get(url, headers=HEADER_AIO) as res:
20
+ html = await res.text(encoding="ISO-8859-1")
21
+ soup = BeautifulSoup(html, "html.parser")
22
+ try:
23
+ container = soup.select_one("div.card-body.container")
24
+ poster = (
25
+ container.find("div")
26
+ .find_all("div")[0]
27
+ .find("picture")
28
+ .find("img")["src"]
29
+ )
30
+ clearfix = soup.find("div", class_="clearfix")
31
+ torrent = clearfix.find("div").find_all("div")[1].find("a")["href"]
32
+ obj["torrent"] = torrent
33
+ obj["poster"] = poster
34
+ except:
35
+ ...
36
+ except:
37
+ return None
38
+
39
+ async def _get_torrent(self, result, session, urls):
40
+ tasks = []
41
+ for idx, url in enumerate(urls):
42
+ for obj in result["data"]:
43
+ if obj["url"] == url:
44
+ task = asyncio.create_task(
45
+ self._individual_scrap(session, url, result["data"][idx])
46
+ )
47
+ tasks.append(task)
48
+ await asyncio.gather(*tasks)
49
+ return result
50
+
51
+ def _parser(self, htmls, idx=1):
52
+ try:
53
+ for html in htmls:
54
+ soup = BeautifulSoup(html, "html.parser")
55
+ list_of_urls = []
56
+ my_dict = {"data": []}
57
+
58
+ for tr in soup.find_all("tr")[idx:]:
59
+ td = tr.find_all("td")
60
+ name = td[1].find("a").get_text(strip=True)
61
+ url = self.BASE_URL + td[1].find("a")["href"]
62
+ list_of_urls.append(url)
63
+ size = td[2].text
64
+ date = td[3].text
65
+ seeders = td[4].text
66
+ leechers = td[5].text
67
+ my_dict["data"].append(
68
+ {
69
+ "name": name,
70
+ "size": size,
71
+ "date": date,
72
+ "seeders": seeders,
73
+ "leechers": leechers,
74
+ "url": url,
75
+ }
76
+ )
77
+ if len(my_dict["data"]) == self.LIMIT:
78
+ break
79
+ return my_dict, list_of_urls
80
+ except:
81
+ return None, None
82
+
83
+ async def search(self, query, page, limit):
84
+ async with aiohttp.ClientSession() as session:
85
+ start_time = time.time()
86
+ self.LIMIT = limit
87
+ url = self.BASE_URL + "/?v=&c=&q={}".format(query)
88
+ return await self.parser_result(start_time, url, session, idx=6)
89
+
90
+ async def parser_result(self, start_time, url, session, idx=1):
91
+ htmls = await Scraper().get_all_results(session, url)
92
+ result, urls = self._parser(htmls, idx)
93
+ if result is not None:
94
+ results = await self._get_torrent(result, session, urls)
95
+ results["time"] = time.time() - start_time
96
+ results["total"] = len(results["data"])
97
+ return results
98
+ return result
99
+
100
+ async def trending(self, category, page, limit):
101
+ async with aiohttp.ClientSession() as session:
102
+ start_time = time.time()
103
+ self.LIMIT = limit
104
+ idx = None
105
+ if not category:
106
+ url = self.BASE_URL + "/top.html"
107
+ idx = 1
108
+ else:
109
+ if category == "books":
110
+ category = "ebooks"
111
+ url = self.BASE_URL + f"/{category}.html"
112
+ idx = 4
113
+ return await self.parser_result(start_time, url, session, idx)
114
+
115
+ async def recent(self, category, page, limit):
116
+ async with aiohttp.ClientSession() as session:
117
+ start_time = time.time()
118
+ self.LIMIT = limit
119
+ idx = None
120
+ if not category:
121
+ url = self.BASE_URL + "/new.html"
122
+ idx = 1
123
+ else:
124
+ if category == "books":
125
+ category = "ebooks"
126
+ url = self.BASE_URL + f"/{category}/latest.html"
127
+ idx = 4
128
+ return await self.parser_result(start_time, url, session, idx)